summ(lm_pre_alpha)
MODEL INFO:
Observations: 24394 (18 missing obs. deleted)
Dependent Variable: sold_price
Type: OLS linear regression
MODEL FIT:
F(65,24328) = 37208.82, p = 0.00
R² = 0.99
Adj. R² = 0.99
Standard errors: OLS
-------------------------------------------------------------------------
Est. S.E. t val. p
----------------------------------- ----------- --------- -------- ------
(Intercept) -16471.89 9512.56 -1.73 0.08
property_typeDUP -1567.54 2875.46 -0.55 0.59
property_typeOTH -2779.57 2056.27 -1.35 0.18
property_typePAT -565.21 930.77 -0.61 0.54
property_typeSGL 1807.60 438.32 4.12 0.00
property_typeTNH 529.68 552.32 0.96 0.34
ac_typenone -55.39 381.27 -0.15 0.88
ac_typenot_central -1637.94 246.08 -6.66 0.00
list_price 0.98 0.00 895.29 0.00
patio1 833.04 126.89 6.57 0.00
school_general1 224.32 161.80 1.39 0.17
photo_count -34.93 7.63 -4.58 0.00
pool1 -157.09 211.72 -0.74 0.46
roof_typeother 1179.35 233.09 5.06 0.00
roof_typeshingle 1981.04 262.17 7.56 0.00
roof_typeslate 561.38 1115.28 0.50 0.61
gas_typenatural 4523.85 8545.09 0.53 0.60
gas_typenone 3936.67 8541.01 0.46 0.64
gas_typepropane -96.15 8741.58 -0.01 0.99
gas_typeunknown 3661.78 8540.06 0.43 0.67
out_building1 -490.46 137.74 -3.56 0.00
area_living -0.81 0.27 -2.97 0.00
land_acres -291.00 154.61 -1.88 0.06
appliances1 928.60 172.69 5.38 0.00
garage1 700.15 126.88 5.52 0.00
property_conditionnew -3450.47 785.55 -4.39 0.00
property_conditionother -354.50 169.05 -2.10 0.04
energy_efficient1 592.05 141.81 4.18 0.00
exterior_typemetal -44.83 402.78 -0.11 0.91
exterior_typeother 54.80 167.73 0.33 0.74
exterior_typevinyl 410.53 186.16 2.21 0.03
exterior_typewood -611.49 263.14 -2.32 0.02
exterior_featurescourtyard 2804.70 1467.81 1.91 0.06
exterior_featuresfence 1047.68 615.20 1.70 0.09
exterior_featuresnone 1600.42 616.39 2.60 0.01
exterior_featuresporch 1133.90 629.71 1.80 0.07
exterior_featurestennis_court 718.57 1727.07 0.42 0.68
fireplace1 329.21 131.35 2.51 0.01
foundation_typeslab 813.97 190.06 4.28 0.00
foundation_typeunspecified -244.66 228.76 -1.07 0.28
area_total -0.20 0.16 -1.26 0.21
beds_total1 -654.41 3179.87 -0.21 0.84
beds_total2 -1146.38 3149.18 -0.36 0.72
beds_total3 -501.07 3152.57 -0.16 0.87
beds_total4 352.34 3158.67 0.11 0.91
beds_total5 -459.18 3216.99 -0.14 0.89
bath_full1 2422.94 3359.73 0.72 0.47
bath_full2 2893.97 3359.50 0.86 0.39
bath_full3 2434.57 3367.54 0.72 0.47
bath_full4 -2224.48 3760.16 -0.59 0.55
bath_full6 -3834.27 9210.17 -0.42 0.68
bath_half1 -365.47 166.84 -2.19 0.03
bath_half2 -1666.56 1100.38 -1.51 0.13
bath_half3 1695.30 6037.98 0.28 0.78
bath_half4 8512.66 8544.34 1.00 0.32
bath_half5 -8585.74 4939.03 -1.74 0.08
age -37.65 3.75 -10.03 0.00
dom -8.30 1.08 -7.68 0.00
sold_date 0.29 0.06 4.58 0.00
sewer_typeseptic -291.81 237.10 -1.23 0.22
sewer_typeunspecified 268.03 129.53 2.07 0.04
property_stylenot_mobile 2238.58 353.84 6.33 0.00
subdivision1 392.59 151.73 2.59 0.01
water_typewell 550.97 600.38 0.92 0.36
waterfront1 -1629.56 225.69 -7.22 0.00
bottom25_dom1 2327.32 159.03 14.63 0.00
-------------------------------------------------------------------------
Note: Advisor suggested not to inlude interaction terms except for specific testing.
a
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
<<<<<<< HEAD
gridExtra::grid.arrange(b,c, nrow =2, ncol = 1)
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
<<<<<<< HEAD
# Living Area
# General graphing
a <- ggplot(data_factor, aes(x = area_living , y = sold_price)) +
geom_point(aes(color = infections_period), alpha = 0.15) +
geom_smooth(aes(color = infections_period)) +
geom_smooth(color = "grey50", linetype = "dashed") +
theme_minimal()
ggplot(data_factor, aes(x = area_living , y = sold_price/area_living)) +
geom_point(aes(color = infections_period), alpha = 0.15) +
geom_smooth(aes(color = infections_period)) +
geom_smooth(color = "grey50", linetype = "dashed") +
theme_minimal()
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# Actual vs. fit
# Model with non-linear addition
lm_pre_alpha_area <- lm(sold_price ~ . + I(area_living^2), data = data_factor_core)
summ(lm_pre_alpha_area)
MODEL INFO:
Observations: 24394 (18 missing obs. deleted)
Dependent Variable: sold_price
Type: OLS linear regression
MODEL FIT:
F(66,24327) = 36741.79, p = 0.00
R² = 0.99
Adj. R² = 0.99
Standard errors: OLS
-------------------------------------------------------------------------
Est. S.E. t val. p
----------------------------------- ----------- --------- -------- ------
(Intercept) -21671.88 9522.17 -2.28 0.02
property_typeDUP -1333.48 2871.86 -0.46 0.64
property_typeOTH -2804.77 2053.59 -1.37 0.17
property_typePAT -620.44 929.59 -0.67 0.50
property_typeSGL 1770.31 437.77 4.04 0.00
property_typeTNH 370.43 551.95 0.67 0.50
ac_typenone 62.25 381.06 0.16 0.87
ac_typenot_central -1498.05 246.37 -6.08 0.00
list_price 0.98 0.00 896.13 0.00
patio1 798.99 126.79 6.30 0.00
school_general1 241.58 161.60 1.49 0.13
photo_count -34.70 7.62 -4.55 0.00
pool1 -73.45 211.70 -0.35 0.73
roof_typeother 1098.57 233.01 4.71 0.00
roof_typeshingle 1920.08 261.94 7.33 0.00
roof_typeslate 536.02 1113.83 0.48 0.63
gas_typenatural 4855.78 8534.04 0.57 0.57
gas_typenone 4318.56 8530.00 0.51 0.61
gas_typepropane 87.56 8730.21 0.01 0.99
gas_typeunknown 3979.77 8529.01 0.47 0.64
out_building1 -490.59 137.56 -3.57 0.00
area_living 6.54 0.95 6.85 0.00
land_acres -285.71 154.41 -1.85 0.06
appliances1 921.60 172.47 5.34 0.00
garage1 666.84 126.78 5.26 0.00
property_conditionnew -3617.20 784.80 -4.61 0.00
property_conditionother -364.93 168.83 -2.16 0.03
energy_efficient1 601.45 141.63 4.25 0.00
exterior_typemetal 16.32 402.32 0.04 0.97
exterior_typeother 58.29 167.52 0.35 0.73
exterior_typevinyl 417.26 185.92 2.24 0.02
exterior_typewood -554.23 262.89 -2.11 0.04
exterior_featurescourtyard 2805.14 1465.90 1.91 0.06
exterior_featuresfence 1048.09 614.40 1.71 0.09
exterior_featuresnone 1584.20 615.59 2.57 0.01
exterior_featuresporch 1119.15 628.89 1.78 0.08
exterior_featurestennis_court 870.69 1724.92 0.50 0.61
fireplace1 264.36 131.42 2.01 0.04
foundation_typeslab 819.18 189.82 4.32 0.00
foundation_typeunspecified -213.55 228.49 -0.93 0.35
area_total -0.27 0.16 -1.71 0.09
beds_total1 -1072.82 3176.15 -0.34 0.74
beds_total2 -2553.13 3149.94 -0.81 0.42
beds_total3 -2327.60 3156.66 -0.74 0.46
beds_total4 -1389.50 3161.99 -0.44 0.66
beds_total5 -1954.03 3218.17 -0.61 0.54
bath_full1 3642.81 3358.78 1.08 0.28
bath_full2 3719.16 3356.69 1.11 0.27
bath_full3 3740.67 3367.07 1.11 0.27
bath_full4 -544.43 3761.07 -0.14 0.88
bath_full6 -3367.38 9198.35 -0.37 0.71
bath_half1 -274.20 167.01 -1.64 0.10
bath_half2 -1480.96 1099.19 -1.35 0.18
bath_half3 1451.22 6030.19 0.24 0.81
bath_half4 7762.17 8533.71 0.91 0.36
bath_half5 -8041.27 4933.05 -1.63 0.10
age -37.00 3.75 -9.87 0.00
dom -8.28 1.08 -7.66 0.00
sold_date 0.28 0.06 4.35 0.00
sewer_typeseptic -304.75 236.80 -1.29 0.20
sewer_typeunspecified 258.97 129.37 2.00 0.05
property_stylenot_mobile 2105.89 353.77 5.95 0.00
subdivision1 401.26 151.53 2.65 0.01
water_typewell 557.86 599.60 0.93 0.35
waterfront1 -1642.44 225.40 -7.29 0.00
bottom25_dom1 2331.25 158.82 14.68 0.00
I(area_living^2) -0.00 0.00 -8.04 0.00
-------------------------------------------------------------------------
# Model with single-variable fit
lm_pre_alpha_area_single <- lm(sold_price ~ area_living, data = data_factor_core)
summ(lm_pre_alpha_area_single)
MODEL INFO:
Observations: 24412
Dependent Variable: sold_price
Type: OLS linear regression
MODEL FIT:
F(1,24410) = 14244.19, p = 0.00
R² = 0.37
Adj. R² = 0.37
Standard errors: OLS
-------------------------------------------------------
Est. S.E. t val. p
----------------- ----------- --------- -------- ------
(Intercept) -20238.66 1644.55 -12.31 0.00
area_living 113.16 0.95 119.35 0.00
-------------------------------------------------------
# Marginal effects data frames
ggpredict_1 <- ggpredict(lm_pre_alpha, terms = "area_living") # total model
ggpredict_2 <- ggpredict(lm_pre_alpha_area, terms = "area_living") # non-linear addition
ggpredict_3 <- ggpredict(lm_pre_alpha_area_single, terms = "area_living") # single-variable fit
# Plots
b <- ggplot(data_factor_core, aes(x = area_living)) +
geom_smooth(data_factor, mapping = aes(y = sold_price), color = "grey50") +
geom_smooth(ggpredict_1, mapping = aes(x, predicted), linetype = "dashed", color = "darkred") +
geom_smooth(ggpredict_2, mapping = aes(x, predicted), linetype = "dashed", color = "darkblue") +
geom_smooth(ggpredict_3, mapping = aes(x, predicted), linetype = "dashed", color = "darkblue")
# Look at age & age^2 alone to see impact on more relevant y-axis scale
c <- ggplot() +
geom_smooth(ggpredict_1, mapping = aes(x, predicted), linetype = "dashed", color = "darkred") +
geom_smooth(ggpredict_2, mapping = aes(x, predicted), linetype = "dashed", color = "darkblue")
# Conclusion
a
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
gridExtra::grid.arrange(b,c, nrow =2, ncol = 1)
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# General graphing
ggplot(data_factor, aes(x = land_acres , y = sold_price)) +
geom_point(aes(color = infections_period), alpha = 0.15) +
geom_smooth(aes(color = infections_period)) +
geom_smooth(color = "grey50", linetype = "dashed") +
theme_minimal()
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(data_factor, aes(x = land_acres, y = sold_price/land_acres)) +
geom_point(aes(color = infections_period), alpha = 0.15) +
geom_smooth(aes(color = infections_period)) +
geom_smooth(color = "grey50", linetype = "dashed") +
theme_minimal()
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#Additions
data_factor_core_clean <- data_factor_core
data_factor_core_clean$age_2 <- I(data_factor_core$age^2)
data_factor_core_clean$area_living_2 <- I(data_factor_core$area_living^2)
a
b
c
<<<<<<< HEAD
# Removals
# - Area_total
# - Listing price
data_factor_core_clean <- subset(data_factor_core_clean, select = -c(area_total, list_price))
data_factor_core_clean <- data_factor_core_clean[-c(23515)]
Error: Can't negate columns that don't exist.
x Location 23515 doesn't exist.
ℹ There are only 34 columns.
Backtrace:
1. data_factor_core_clean[-c(23515)]
2. tibble:::`[.tbl_df`(data_factor_core_clean, -c(23515))
3. tibble:::vectbl_as_col_location(...)
6. vctrs::vec_as_location(j, n, names)
8. vctrs:::stop_subscript_oob(...)
9. vctrs:::stop_subscript(...)
summ(lm_alpha)
MODEL INFO:
Observations: 24393 (18 missing obs. deleted)
Dependent Variable: sold_price
Type: Linear regression
MODEL FIT:
χ²(64) = 117172996786096.98, p = 0.00
Pseudo-R² (Cragg-Uhler) = 0.66
Pseudo-R² (McFadden) = 0.04
AIC = 597055.98, BIC = 597590.71
Standard errors: MLE
--------------------------------------------------------------------------
Est. S.E. t val. p
----------------------------------- ----------- ---------- -------- ------
(Intercept) 7944.53 25040.74 0.32 0.75
property_typeDUP -49776.83 16818.97 -2.96 0.00
property_typeOTH 16195.71 12028.70 1.35 0.18
property_typePAT 15014.63 5441.44 2.76 0.01
property_typeSGL 21550.02 2545.76 8.47 0.00
property_typeTNH -3912.40 3231.68 -1.21 0.23
ac_typenone -44471.24 2213.16 -20.09 0.00
ac_typenot_central -14312.82 1441.33 -9.93 0.00
patio1 8218.69 739.05 11.12 0.00
school_general1 12474.85 944.70 13.21 0.00
photo_count 903.06 44.37 20.35 0.00
pool1 11856.09 1238.46 9.57 0.00
roof_typeother 3337.24 1359.27 2.46 0.01
roof_typeshingle 20790.72 1527.80 13.61 0.00
roof_typeslate 6522.07 6525.08 1.00 0.32
gas_typenone -33136.30 2147.25 -15.43 0.00
gas_typepropane -3375.37 11132.52 -0.30 0.76
gas_typeunknown -38807.07 2107.17 -18.42 0.00
out_building1 -5970.10 800.48 -7.46 0.00
area_living 42.15 5.43 7.76 0.00
land_acres 2513.39 898.79 2.80 0.01
appliances1 25181.42 999.29 25.20 0.00
garage1 12206.68 733.84 16.63 0.00
property_conditionnew -25781.04 4623.44 -5.58 0.00
property_conditionother -21654.41 977.66 -22.15 0.00
energy_efficient1 14013.73 823.53 17.02 0.00
exterior_typemetal -478.40 2356.87 -0.20 0.84
exterior_typeother 10423.23 979.05 10.65 0.00
exterior_typevinyl 4548.23 1088.81 4.18 0.00
exterior_typewood 3158.59 1539.38 2.05 0.04
exterior_featurescourtyard 36564.44 8587.97 4.26 0.00
exterior_featuresfence -25346.57 3594.93 -7.05 0.00
exterior_featuresnone -18900.07 3603.69 -5.24 0.00
exterior_featuresporch -25801.74 3682.07 -7.01 0.00
exterior_featurestennis_court 10302.14 10104.95 1.02 0.31
fireplace1 11754.15 768.42 15.30 0.00
foundation_typeslab 15254.06 1116.34 13.66 0.00
foundation_typeunspecified 8258.34 1340.03 6.16 0.00
beds_total1 -32233.49 18604.90 -1.73 0.08
beds_total2 -44404.19 18451.56 -2.41 0.02
beds_total3 -50134.64 18490.29 -2.71 0.01
beds_total4 -46519.73 18521.48 -2.51 0.01
beds_total5 -61554.10 18848.52 -3.27 0.00
bath_full1 -30516.77 19675.99 -1.55 0.12
bath_full2 -6674.87 19664.68 -0.34 0.73
bath_full3 16217.61 19725.77 0.82 0.41
bath_full4 12634.06 22034.17 0.57 0.57
bath_full6 18673.99 53893.54 0.35 0.73
bath_half1 12465.57 974.62 12.79 0.00
bath_half2 30380.88 6429.41 4.73 0.00
bath_half3 56559.91 35321.60 1.60 0.11
bath_half4 91519.65 49985.88 1.83 0.07
bath_half5 -56904.41 28892.95 -1.97 0.05
age -1886.46 63.76 -29.59 0.00
dom -20.28 6.33 -3.20 0.00
sold_date 4.26 0.38 11.15 0.00
sewer_typeseptic -6477.81 1388.39 -4.67 0.00
sewer_typeunspecified -4734.60 756.49 -6.26 0.00
property_stylenot_mobile 68303.77 2024.52 33.74 0.00
subdivision1 3626.89 887.21 4.09 0.00
water_typewell 2051.36 3510.88 0.58 0.56
waterfront1 19813.22 1313.09 15.09 0.00
bottom25_dom1 11493.99 928.88 12.37 0.00
age_2 16.76 0.81 20.76 0.00
area_living_2 0.01 0.00 3.78 0.00
--------------------------------------------------------------------------
Estimated dispersion parameter = 2490897637
# Waves of infection
ggplot(data_factor, aes(x = as.Date(sold_date), y = infections_3mma)) +
geom_point(color = "grey35") +
geom_smooth(linetype = "dashed", color = "gray46") +
theme_minimal() +
scale_x_date(limits = as.Date(c("2020-01-01", "2021-12-31"))) +
scale_y_continuous(limits = c(0,max(infections_3mma))) +
xlab(" ") +
ylab("Confirmed Infections per Day") +
labs(title = "Waves of Infection",
caption = "") +
geom_vline(xintercept = as.numeric(as.Date("2020-03-23")), linetype=4)
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 17731 rows containing non-finite values (stat_smooth).
Warning: Removed 17731 rows containing missing values (geom_point).
Warning: Removed 3 rows containing missing values (geom_smooth).
# Accumulation of infections
ggplot(data_factor, aes(x = as.Date(sold_date), y = I(infections_accum/1000))) +
geom_point(color = "grey35") +
geom_smooth(linetype = "dashed", color = "gray46") +
theme_minimal() +
scale_x_date(limits = as.Date(c("2020-01-01", "2021-12-31"))) +
scale_y_continuous(limits = c(0,max(I(infections_accum/1000)))) +
xlab(" ") +
ylab("Accumulation of Infections (in 000's") +
labs(title = "Accumulation of Infections",
caption = "")
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 17731 rows containing non-finite values (stat_smooth).
Warning: Removed 17731 rows containing missing values (geom_point).
Warning: Removed 3 rows containing missing values (geom_smooth).
# Infections and home prices
ggplot(data_factor, aes(x = I(infections_3mma/1000), y = sold_price)) +
#geom_point() +
geom_smooth(linetype = "dashed", color = "gray46") +
theme_minimal() +
scale_x_continuous( limits = c(0,max(I(infections_3mma/1000)))) +
xlab("3-Month Moving Average of Daily Infections (in 000's)") +
ylab("Sold Price (Actual)") +
labs(title = "Infections and Price",
caption = "")
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#Price on Infections
very_low <- "#460f5c"
low <- "#2c728e"
med <- "#27ad81"
high <- "#f4e61e"
# "#ff6c67", "#00c2c6"
ggplot(data_factor, aes(x = infections_period, y = sold_price, fill = infections_period)) +
geom_violin(alpha = 0.5) +
geom_boxplot(width=0.1) +
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="D") +
coord_flip() +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=11)) +
ggtitle("Comparison of Sold Price") +
xlab("Infections Present (1 = yes)") +
scale_fill_manual(values=c(very_low, med))
Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing scale.
# Testing Corona
lm_corona <- lm(sold_price ~ infections_3mma + .
,data = data_factor_core_clean)
summ(lm_corona)
MODEL INFO:
Observations: 24653 (19 missing obs. deleted)
Dependent Variable: sold_price
Type: OLS linear regression
MODEL FIT:
F(65,24587) = 740.89, p = 0.00
R² = 0.66
Adj. R² = 0.66
Standard errors: OLS
---------------------------------------------------------------------------
Est. S.E. t val. p
----------------------------------- ------------ ---------- -------- ------
(Intercept) 180768.74 57877.60 3.12 0.00
infections_3mma 9.67 0.55 17.53 0.00
property_typeDUP -51453.66 17405.89 -2.96 0.00
property_typeOTH 24615.51 12134.43 2.03 0.04
property_typePAT 16034.82 5606.46 2.86 0.00
property_typeSGL 22833.72 2625.02 8.70 0.00
property_typeTNH -3147.82 3330.86 -0.95 0.34
ac_typenone -45777.67 2288.13 -20.01 0.00
ac_typenot_central -13751.04 1482.97 -9.27 0.00
patio1 8117.83 761.13 10.67 0.00
school_general1 11371.01 979.63 11.61 0.00
photo_count 914.55 45.30 20.19 0.00
pool1 12939.44 1265.41 10.23 0.00
roof_typeother 3779.46 1402.59 2.69 0.01
roof_typeshingle 21166.81 1574.80 13.44 0.00
roof_typeslate 10025.75 6701.58 1.50 0.13
gas_typenatural -92572.23 51731.25 -1.79 0.07
gas_typenone -132478.82 51704.70 -2.56 0.01
gas_typepropane -105428.20 52920.81 -1.99 0.05
gas_typeunknown -137409.67 51698.24 -2.66 0.01
out_building1 -6076.37 823.42 -7.38 0.00
area_living 32.44 5.46 5.94 0.00
land_acres 2615.42 924.61 2.83 0.00
appliances1 24679.44 1030.47 23.95 0.00
garage1 11973.79 755.72 15.84 0.00
property_conditionnew -24640.48 4675.26 -5.27 0.00
property_conditionother -20596.44 1010.59 -20.38 0.00
energy_efficient1 14040.15 847.28 16.57 0.00
exterior_typemetal -37.33 2429.54 -0.02 0.99
exterior_typeother 11897.19 1007.73 11.81 0.00
exterior_typevinyl 5135.53 1122.19 4.58 0.00
exterior_typewood 3742.67 1586.37 2.36 0.02
exterior_featurescourtyard 34564.32 8523.08 4.06 0.00
exterior_featuresfence -32068.35 3626.83 -8.84 0.00
exterior_featuresnone -25089.98 3636.93 -6.90 0.00
exterior_featuresporch -32085.30 3718.60 -8.63 0.00
exterior_featurestennis_court -425.91 10424.65 -0.04 0.97
fireplace1 11695.55 792.13 14.76 0.00
foundation_typeslab 14759.89 1150.00 12.83 0.00
foundation_typeunspecified 8375.81 1382.82 6.06 0.00
beds_total1 -28431.74 19252.36 -1.48 0.14
beds_total2 -37258.25 19092.77 -1.95 0.05
beds_total3 -43522.59 19132.21 -2.27 0.02
beds_total4 -41182.09 19164.64 -2.15 0.03
beds_total5 -59183.00 19468.00 -3.04 0.00
bath_full1 -31961.75 20362.52 -1.57 0.12
bath_full2 -6980.82 20351.01 -0.34 0.73
bath_full3 19902.15 20411.88 0.98 0.33
bath_full4 22788.02 22563.01 1.01 0.31
bath_full6 20194.81 55765.74 0.36 0.72
bath_half1 14105.18 998.23 14.13 0.00
bath_half2 38562.72 6450.29 5.98 0.00
bath_half3 59379.75 36556.86 1.62 0.10
bath_half4 73612.69 51732.57 1.42 0.15
bath_half5 -61754.65 29897.51 -2.07 0.04
age -2017.22 65.30 -30.89 0.00
dom -61.39 5.76 -10.65 0.00
sold_date 0.50 0.46 1.08 0.28
sewer_typeseptic -6656.23 1430.73 -4.65 0.00
sewer_typeunspecified -5363.79 778.99 -6.89 0.00
property_stylenot_mobile 68394.40 2090.91 32.71 0.00
subdivision1 3395.40 912.56 3.72 0.00
water_typewell 1157.24 3603.27 0.32 0.75
waterfront1 20298.31 1342.88 15.12 0.00
age_2 18.30 0.83 22.16 0.00
area_living_2 0.01 0.00 6.16 0.00
---------------------------------------------------------------------------
coeftest(lm_corona, vcov = vcovHC(lm_corona, method = "White2", type = "HC0"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.8077e+05 3.3119e+04 5.4582 4.857e-08 ***
infections_3mma 9.6711e+00 5.7357e-01 16.8612 < 2.2e-16 ***
property_typeDUP -5.1454e+04 1.5523e+04 -3.3148 0.0009185 ***
property_typeOTH 2.4616e+04 1.4812e+04 1.6618 0.0965596 .
property_typePAT 1.6035e+04 5.5605e+03 2.8837 0.0039340 **
property_typeSGL 2.2834e+04 2.7157e+03 8.4081 < 2.2e-16 ***
property_typeTNH -3.1478e+03 3.3429e+03 -0.9416 0.3463877
ac_typenone -4.5778e+04 1.9684e+03 -23.2561 < 2.2e-16 ***
ac_typenot_central -1.3751e+04 1.5990e+03 -8.5998 < 2.2e-16 ***
patio1 8.1178e+03 7.7805e+02 10.4335 < 2.2e-16 ***
school_general1 1.1371e+04 1.0385e+03 10.9496 < 2.2e-16 ***
photo_count 9.1455e+02 4.8880e+01 18.7103 < 2.2e-16 ***
pool1 1.2939e+04 1.4018e+03 9.2307 < 2.2e-16 ***
roof_typeother 3.7795e+03 1.4476e+03 2.6109 0.0090365 **
roof_typeshingle 2.1167e+04 1.6503e+03 12.8259 < 2.2e-16 ***
roof_typeslate 1.0026e+04 9.8722e+03 1.0156 0.3098516
gas_typenatural -9.2572e+04 3.6053e+03 -25.6768 < 2.2e-16 ***
gas_typenone -1.3248e+05 2.4622e+03 -53.8049 < 2.2e-16 ***
gas_typepropane -1.0543e+05 1.8139e+04 -5.8123 6.237e-09 ***
gas_typeunknown -1.3741e+05 2.3502e+03 -58.4673 < 2.2e-16 ***
out_building1 -6.0764e+03 8.2748e+02 -7.3432 2.150e-13 ***
area_living 3.2442e+01 6.1755e+00 5.2533 1.506e-07 ***
land_acres 2.6154e+03 9.3633e+02 2.7933 0.0052217 **
appliances1 2.4679e+04 1.1339e+03 21.7658 < 2.2e-16 ***
garage1 1.1974e+04 7.7362e+02 15.4777 < 2.2e-16 ***
property_conditionnew -2.4640e+04 6.4509e+03 -3.8197 0.0001340 ***
property_conditionother -2.0596e+04 9.4498e+02 -21.7957 < 2.2e-16 ***
energy_efficient1 1.4040e+04 8.4263e+02 16.6623 < 2.2e-16 ***
exterior_typemetal -3.7329e+01 2.3648e+03 -0.0158 0.9874058
exterior_typeother 1.1897e+04 1.0773e+03 11.0436 < 2.2e-16 ***
exterior_typevinyl 5.1355e+03 1.1145e+03 4.6080 4.086e-06 ***
exterior_typewood 3.7427e+03 1.7848e+03 2.0970 0.0360022 *
exterior_featurescourtyard 3.4564e+04 1.4123e+04 2.4474 0.0143969 *
exterior_featuresfence -3.2068e+04 5.3581e+03 -5.9850 2.194e-09 ***
exterior_featuresnone -2.5090e+04 5.3651e+03 -4.6766 2.933e-06 ***
exterior_featuresporch -3.2085e+04 5.4215e+03 -5.9182 3.299e-09 ***
exterior_featurestennis_court -4.2591e+02 1.0542e+04 -0.0404 0.9677739
fireplace1 1.1696e+04 8.3445e+02 14.0158 < 2.2e-16 ***
foundation_typeslab 1.4760e+04 1.2931e+03 11.4146 < 2.2e-16 ***
foundation_typeunspecified 8.3758e+03 1.4303e+03 5.8559 4.806e-09 ***
beds_total1 -2.8432e+04 2.5251e+04 -1.1260 0.2601957
beds_total2 -3.7258e+04 2.5163e+04 -1.4807 0.1387039
beds_total3 -4.3523e+04 2.5227e+04 -1.7253 0.0844946 .
beds_total4 -4.1182e+04 2.5265e+04 -1.6300 0.1031127
beds_total5 -5.9183e+04 2.5704e+04 -2.3025 0.0213170 *
bath_full1 -3.1962e+04 2.4096e+04 -1.3264 0.1847120
bath_full2 -6.9808e+03 2.4086e+04 -0.2898 0.7719459
bath_full3 1.9902e+04 2.4179e+04 0.8231 0.4104509
bath_full4 2.2788e+04 3.0301e+04 0.7521 0.4520199
bath_full6 2.0195e+04 2.4906e+04 0.8108 0.4174683
bath_half1 1.4105e+04 1.1369e+03 12.4062 < 2.2e-16 ***
bath_half2 3.8563e+04 7.8980e+03 4.8826 1.054e-06 ***
bath_half3 5.9380e+04 1.0913e+04 5.4414 5.336e-08 ***
bath_half4 7.3613e+04 3.2038e+03 22.9767 < 2.2e-16 ***
bath_half5 -6.1755e+04 2.7625e+04 -2.2355 0.0253948 *
age -2.0172e+03 8.4747e+01 -23.8030 < 2.2e-16 ***
dom -6.1395e+01 5.7883e+00 -10.6067 < 2.2e-16 ***
sold_date 4.9735e-01 4.7529e-01 1.0464 0.2953845
sewer_typeseptic -6.6562e+03 1.4638e+03 -4.5472 5.463e-06 ***
sewer_typeunspecified -5.3638e+03 7.5612e+02 -7.0938 1.340e-12 ***
property_stylenot_mobile 6.8394e+04 1.7615e+03 38.8270 < 2.2e-16 ***
subdivision1 3.3954e+03 9.2014e+02 3.6901 0.0002247 ***
water_typewell 1.1572e+03 4.0744e+03 0.2840 0.7763914
waterfront1 2.0298e+04 1.5074e+03 13.4654 < 2.2e-16 ***
age_2 1.8303e+01 1.1918e+00 15.3579 < 2.2e-16 ***
area_living_2 8.9354e-03 1.7703e-03 5.0473 4.512e-07 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Visualizing marginal effect per positive tests on price
lm_corona_single <- lm(sold_price ~ infections_3mma
,data = data_factor_core_clean)
summ(lm_corona_single)
MODEL INFO:
Observations: 24672
Dependent Variable: sold_price
Type: OLS linear regression
MODEL FIT:
F(1,24670) = 1003.58, p = 0.00
R² = 0.04
Adj. R² = 0.04
Standard errors: OLS
----------------------------------------------------------
Est. S.E. t val. p
--------------------- ----------- -------- -------- ------
(Intercept) 162738.68 618.07 263.30 0.00
infections_3mma 21.53 0.68 31.68 0.00
----------------------------------------------------------
ggpredict_1 <- ggpredict(lm_corona, terms = "infections_3mma")
ggpredict_2 <- ggpredict(lm_corona_single, terms = "infections_3mma")
# Plots
ggplot(data_factor_core, aes(x = infections_3mma)) +
geom_smooth(data_factor_core, mapping = aes(y = sold_price), color = "grey50") + # Actual Data
geom_smooth(ggpredict_1, mapping = aes(x, predicted), linetype = "dashed", color = "darkred") + # Controlled model
geom_smooth(ggpredict_2, mapping = aes(x, predicted), linetype = "dashed", color = "darkblue") + # Best single fit
ggtitle("Model Fit Overview")
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
# Predicting infections with house prices
lm_flip <- lm_flip <- lm(infections_3mma ~ sold_price , data = data_factor)
summ(lm_flip)
MODEL INFO:
Observations: 24672
Dependent Variable: infections_3mma
Type: OLS linear regression
MODEL FIT:
F(1,24670) = 1003.58, p = 0.00
R² = 0.04
Adj. R² = 0.04
Standard errors: OLS
-------------------------------------------------
Est. S.E. t val. p
----------------- ------- ------- -------- ------
(Intercept) 92.28 11.06 8.34 0.00
sold_price 0.00 0.00 31.68 0.00
-------------------------------------------------
ggpredict_flip <- ggpredict(lm_flip, terms = "sold_price")
ggplot(data_factor, aes(x = sold_price)) +
geom_smooth(data_factor, mapping = aes(y = infections_3mma), color = "grey50") +
geom_smooth(ggpredict_flip, mapping = aes(x, predicted), linetype = "dashed", color = "darkred") +
labs(title = "Flipped Regression", subtitle = "Explining Infections using Variations in Price",
caption = "")
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
# Distribution
# Find the mean of each group
library(plyr)
data_factor$beds_total <- as.numeric(data_factor$beds_total)
room_mean <- ddply(data_factor, "infections_period", summarise, beds_mean=mean(beds_total, na.rm = TRUE))
data_factor$beds_total <- as.numeric(data_factor$beds_total)
a <- ggplot(data_factor, aes(x=beds_total, fill = infections_period)) +
geom_density(alpha = 0.5, position = "identity") +
scale_fill_manual(values=c(very_low, med)) +
labs(title = "Distibution of Number of Bedrooms") +
geom_vline(data=room_mean, aes(xintercept = room_mean[2,2]), linetype="dashed", size= 0.4, color = very_low, alpha = 0.5) +
geom_vline(data=room_mean, aes(xintercept = room_mean[1,2]), linetype="dashed", size= 0.4, alpha = 0.5) +
xlab("Number of Bedrooms") +
ylab("Density") +
labs(fill = "Infection Period")
# Distribution of total price and number of beds
data_factor$beds_total <- as.factor(data_factor$beds_total)
b <- ggplot(data = subset(data_factor, !is.na(beds_total)), aes(x = beds_total, y = sold_price, fill = beds_total)) +
geom_violin(alpha = 0.5) +
geom_boxplot(width=0.1) +
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="D") +
#coord_flip() +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=14)) +
labs(title = "Distributions of Sold Price by Number of Bedrooms",
caption = "") +
xlab("Number of Bedrooms") +
ylab("Sold Price")
#+
#scale_fill_manual(values = c(very_low, med),
# name = "Infection Period",
# labels = c("Pre", "Post"))
# Distribution of price and number of beds before and after corona period
c <- ggplot(data = subset(data_factor, !is.na(beds_total)), aes(x = beds_total, y = sold_price, fill = beds_total)) +
geom_violin(data = subset(data_factor, !is.na(beds_total)), mapping = aes(alpha = 0.5, fill = infections_period)) +
geom_boxplot(width=0.1) +
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="D") +
#coord_flip() +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=14)) +
labs(title = "Distributions of Sold Price by Number of Bedrooms",
subtitle = "Price Pre vs. Post Infection Period",
caption = "") +
xlab("Number of Bedrooms") +
ylab("Sold Price")
# Distribution of price per sqft. and number of beds
data_factor$beds_total <- as.factor(data_factor$beds_total)
d <- ggplot(data = subset(data_factor, !is.na(beds_total)), aes(x = beds_total, y = sold_price/area_living, fill = beds_total)) +
geom_violin(alpha = 0.5) +
geom_boxplot(width=0.1) +
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="D") +
#coord_flip() +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=14)) +
labs( title = "Distributions of Sold Price by Number of Bedrooms", subtitle = "Sold Price Per Sqft.",
caption = "") +
xlab("Number of Bedrooms") +
ylab("Sold Price per Sqft.")
# Distribution of price per sqft. and number of beds before and after corona period
e <- ggplot(data = subset(data_factor, !is.na(beds_total)), aes(x = beds_total, y = sold_price/area_living , fill = beds_total)) +
geom_violin(data = subset(data_factor, !is.na(beds_total)), mapping = aes(alpha = 0.5, fill = infections_period)) +
geom_boxplot(width=0.1) +
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="D") +
#coord_flip() +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=14)) +
labs( title = "Distributions of Sold Price by Number of Bedrooms", subtitle = "Sold Price Per Sqft. Pre vs. Post Infection Period",
caption = "") +
xlab("Number of Bedrooms") +
ylab("Sold Price per Sqft.")
gridExtra::grid.arrange(a)
gridExtra::grid.arrange(b)
gridExtra::grid.arrange(c)
gridExtra::grid.arrange(d)
gridExtra::grid.arrange(e)
#gridExtra::grid.arrange(b,c, ncol = 2)
Ideas
coeftest(lm_corona_bedrooms, vcov = vcovHC(lm_corona_bedrooms, method = "White2", type = "HC0"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.4776e+05 2.9950e+04 8.2726 < 2.2e-16 ***
ac_typenone -5.6116e+04 1.9719e+03 -28.4581 < 2.2e-16 ***
ac_typenot_central -2.1396e+04 1.7984e+03 -11.8972 < 2.2e-16 ***
patio1 1.2550e+04 8.6935e+02 14.4358 < 2.2e-16 ***
school_general1 9.5793e+03 1.1390e+03 8.4102 < 2.2e-16 ***
photo_count 1.3164e+03 5.2185e+01 25.2250 < 2.2e-16 ***
pool1 2.1208e+04 1.5874e+03 13.3605 < 2.2e-16 ***
roof_typeother 7.7967e+03 1.5121e+03 5.1561 2.541e-07 ***
roof_typeshingle 2.8462e+04 1.7549e+03 16.2185 < 2.2e-16 ***
roof_typeslate 1.8896e+04 9.9617e+03 1.8969 0.0578545 .
gas_typenatural -1.0107e+05 3.6709e+03 -27.5316 < 2.2e-16 ***
gas_typenone -1.4202e+05 2.3211e+03 -61.1860 < 2.2e-16 ***
gas_typepropane -9.9809e+04 1.8136e+04 -5.5034 3.763e-08 ***
gas_typeunknown -1.4162e+05 2.1590e+03 -65.5982 < 2.2e-16 ***
out_building1 -5.8258e+03 9.2193e+02 -6.3192 2.675e-10 ***
appliances1 2.5486e+04 1.2330e+03 20.6704 < 2.2e-16 ***
property_conditionnew -2.3228e+04 6.7244e+03 -3.4543 0.0005527 ***
property_conditionother -2.1236e+04 1.0801e+03 -19.6609 < 2.2e-16 ***
energy_efficient1 1.9237e+04 9.2813e+02 20.7269 < 2.2e-16 ***
exterior_typemetal -3.9464e+03 2.5047e+03 -1.5756 0.1151323
exterior_typeother 1.4494e+04 1.2073e+03 12.0049 < 2.2e-16 ***
exterior_typevinyl 3.2328e+03 1.2554e+03 2.5751 0.0100260 *
exterior_typewood 2.1060e+03 1.9850e+03 1.0609 0.2887255
exterior_featurescourtyard 3.7099e+04 1.4742e+04 2.5166 0.0118550 *
exterior_featuresfence -3.2078e+04 5.9856e+03 -5.3593 8.430e-08 ***
exterior_featuresnone -2.2204e+04 5.9969e+03 -3.7026 0.0002139 ***
exterior_featuresporch -2.8730e+04 6.0661e+03 -4.7361 2.191e-06 ***
exterior_featurestennis_court 1.2094e+04 1.3905e+04 0.8698 0.3844197
fireplace1 3.2917e+04 8.6870e+02 37.8919 < 2.2e-16 ***
foundation_typeslab 2.0097e+04 1.3612e+03 14.7636 < 2.2e-16 ***
foundation_typeunspecified 9.3882e+03 1.5132e+03 6.2042 5.587e-10 ***
beds_total1 -6.9914e+04 2.9402e+04 -2.3779 0.0174185 *
beds_total2 -5.0169e+04 2.9144e+04 -1.7214 0.0851827 .
beds_total3 -2.4161e+04 2.9143e+04 -0.8291 0.4070831
beds_total4 1.7109e+04 2.9166e+04 0.5866 0.5574625
beds_total5 2.6766e+04 2.9743e+04 0.8999 0.3681774
age -2.2840e+03 8.5578e+01 -26.6889 < 2.2e-16 ***
dom -3.5794e+01 6.3773e+00 -5.6128 2.012e-08 ***
sewer_typeseptic -4.8724e+03 1.5741e+03 -3.0955 0.0019673 **
sewer_typeunspecified -5.1675e+03 8.4585e+02 -6.1093 1.016e-09 ***
property_stylenot_mobile 7.3419e+04 1.8556e+03 39.5664 < 2.2e-16 ***
subdivision1 2.4097e+03 1.0162e+03 2.3712 0.0177365 *
water_typewell -1.7269e+03 4.7569e+03 -0.3630 0.7165787
waterfront1 2.9108e+04 1.7101e+03 17.0217 < 2.2e-16 ***
age_2 2.0720e+01 1.1683e+00 17.7342 < 2.2e-16 ***
data_factor$infections_3mma -2.7000e+01 1.5318e+01 -1.7626 0.0779746 .
beds_total1:data_factor$infections_3mma 2.3287e+01 1.5739e+01 1.4796 0.1390006
beds_total2:data_factor$infections_3mma 3.1473e+01 1.5368e+01 2.0480 0.0405680 *
beds_total3:data_factor$infections_3mma 3.5902e+01 1.5330e+01 2.3420 0.0191906 *
beds_total4:data_factor$infections_3mma 3.6758e+01 1.5373e+01 2.3911 0.0168045 *
beds_total5:data_factor$infections_3mma 4.5991e+01 1.6466e+01 2.7931 0.0052244 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Find the mean of each group
library(plyr)
price_means <- ddply(data_factor, "infections_period", summarise, price_mean = mean(sold_price, na.rm = TRUE))
# Distribution: Total
ggplot(data_factor, aes(x = sold_price)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
ggtitle("Price Distribution") +
geom_vline(data=price_means, aes(xintercept = mean(sold_price)), linetype="dashed", size= 0.4, color = very_low, alpha = 0.8) +
xlab("Sold Price") +
ylab("Density")
# Distribution: Infection
ggplot(data_factor, aes(x = sold_price, fill = infections_period)) +
geom_density(alpha = 0.5, position = "identity") +
ggtitle("Price Distributions") +
geom_vline(data=price_means, aes(xintercept = price_means[2,2]), linetype="dashed", size= 0.4, color = med, alpha = 0.8) +
geom_vline(data = price_means, aes(xintercept = price_means[1,2]), linetype="dashed", size= 0.4, color = very_low, alpha = 0.8) +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post")) +
xlab("Sold Price") +
ylab("Density") +
labs(fill = "Infection Period")
# Distribution: Top vs. Bottom
ggplot(data_factor) +
geom_density(aes(x = sold_price, fill = infections_period), alpha = 0.5, position = "identity") +
facet_grid(vars(top25_sold_price, bottom25_sold_price), scales = "free") +
ggtitle("Price Distributions") +
scale_fill_manual(values=c(very_low, med)) +
xlab("Sold Price") +
labs(fill = "Infection Period") +
ylab("Density") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post"))
Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing scale.
#Price and Infections
ggplot(data_factor, aes(x = infections_period, y = sold_price, fill = infections_period)) +
geom_violin(alpha = 0.5) +
geom_boxplot(width=0.1) +
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="D") +
coord_flip() +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=11)) +
ggtitle("Comparison of Sold Price") +
xlab("Infection Period") +
scale_fill_manual(values=c(very_low, med)) +
ylab("Sold Price")
Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing scale.
coeftest(lm_corona_price_bottom, vcov = vcovHC(lm_corona_price_bottom, method = "White2", type = "HC0"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.7857e+05 2.2480e+04 12.3920 < 2.2e-16 ***
property_typeDUP -2.1975e+04 1.6374e+04 -1.3421 0.1795756
property_typeOTH 1.4584e+04 1.2380e+04 1.1781 0.2387656
property_typePAT 1.0163e+04 4.9850e+03 2.0388 0.0414796 *
property_typeSGL 1.8552e+04 2.3641e+03 7.8474 4.420e-15 ***
property_typeTNH -4.4440e+03 3.0234e+03 -1.4699 0.1416099
ac_typenone -2.5552e+04 1.3619e+03 -18.7624 < 2.2e-16 ***
ac_typenot_central -3.3607e+03 1.2777e+03 -2.6302 0.0085391 **
patio1 4.1710e+03 6.7317e+02 6.1960 5.882e-10 ***
school_general1 7.7967e+03 8.8654e+02 8.7945 < 2.2e-16 ***
photo_count 5.6804e+02 4.0920e+01 13.8818 < 2.2e-16 ***
pool1 1.1680e+04 1.3071e+03 8.9360 < 2.2e-16 ***
roof_typeother -2.7074e+02 1.1928e+03 -0.2270 0.8204422
roof_typeshingle 1.1414e+04 1.4048e+03 8.1253 4.672e-16 ***
roof_typeslate 5.8497e+03 8.3957e+03 0.6967 0.4859696
gas_typenatural -6.6364e+04 3.2084e+03 -20.6848 < 2.2e-16 ***
gas_typenone -1.0699e+05 2.1142e+03 -50.6067 < 2.2e-16 ***
gas_typepropane -7.3202e+04 1.5062e+04 -4.8601 1.181e-06 ***
gas_typeunknown -1.0843e+05 2.0384e+03 -53.1917 < 2.2e-16 ***
out_building1 -6.6969e+03 7.1902e+02 -9.3140 < 2.2e-16 ***
area_living -1.8309e+01 5.5182e+00 -3.3179 0.0009081 ***
land_acres 1.6068e+03 7.6658e+02 2.0960 0.0360904 *
appliances1 1.0363e+04 8.8813e+02 11.6681 < 2.2e-16 ***
garage1 6.9436e+03 6.6603e+02 10.4254 < 2.2e-16 ***
property_conditionnew -9.6468e+03 5.8477e+03 -1.6497 0.0990228 .
property_conditionother -1.0477e+04 8.6301e+02 -12.1400 < 2.2e-16 ***
energy_efficient1 1.0733e+04 7.4567e+02 14.3938 < 2.2e-16 ***
exterior_typemetal -6.8533e+02 1.9199e+03 -0.3570 0.7211248
exterior_typeother 8.7693e+03 9.3197e+02 9.4094 < 2.2e-16 ***
exterior_typevinyl 2.1807e+03 9.6058e+02 2.2702 0.0232035 *
exterior_typewood 3.6714e+03 1.4550e+03 2.5232 0.0116338 *
exterior_featurescourtyard 2.3334e+04 1.2856e+04 1.8151 0.0695255 .
exterior_featuresfence -3.1847e+04 4.8489e+03 -6.5679 5.206e-11 ***
exterior_featuresnone -2.7200e+04 4.8484e+03 -5.6101 2.043e-08 ***
exterior_featuresporch -3.1926e+04 4.8992e+03 -6.5165 7.334e-11 ***
exterior_featurestennis_court -8.2497e+03 9.9486e+03 -0.8292 0.4069776
fireplace1 1.0616e+04 7.1079e+02 14.9359 < 2.2e-16 ***
foundation_typeslab 4.0988e+03 1.0575e+03 3.8759 0.0001065 ***
foundation_typeunspecified 2.1203e+03 1.1394e+03 1.8609 0.0627696 .
beds_total1 -6.9770e+03 2.1265e+04 -0.3281 0.7428376
beds_total2 -1.3761e+04 2.1173e+04 -0.6499 0.5157434
beds_total3 -2.2255e+04 2.1202e+04 -1.0497 0.2938691
beds_total4 -1.7414e+04 2.1228e+04 -0.8203 0.4120491
beds_total5 -3.5118e+04 2.1647e+04 -1.6223 0.1047492
bath_full1 -1.6852e+04 1.3865e+04 -1.2154 0.2242251
bath_full2 -8.9356e+03 1.3844e+04 -0.6454 0.5186491
bath_full3 1.6747e+04 1.3974e+04 1.1984 0.2307573
bath_full4 1.5693e+04 2.1486e+04 0.7304 0.4651680
bath_full6 4.0308e+04 1.4910e+04 2.7034 0.0068690 **
bath_half1 1.3388e+04 1.0236e+03 13.0790 < 2.2e-16 ***
bath_half2 3.0999e+04 8.0343e+03 3.8584 0.0001144 ***
bath_half3 5.9627e+04 8.9836e+03 6.6373 3.262e-11 ***
bath_half4 9.4038e+04 2.9045e+03 32.3760 < 2.2e-16 ***
bath_half5 -3.1271e+04 2.1658e+04 -1.4439 0.1487886
age -1.6200e+03 7.0930e+01 -22.8398 < 2.2e-16 ***
dom -4.0071e+01 4.9024e+00 -8.1738 3.132e-16 ***
sewer_typeseptic -6.3326e+03 1.2071e+03 -5.2461 1.567e-07 ***
sewer_typeunspecified -4.9833e+03 6.5197e+02 -7.6433 2.194e-14 ***
property_stylenot_mobile 2.8897e+04 1.5948e+03 18.1201 < 2.2e-16 ***
subdivision1 2.2100e+03 7.7041e+02 2.8686 0.0041258 **
water_typewell 2.8814e+03 3.4386e+03 0.8379 0.4020708
waterfront1 1.7854e+04 1.3436e+03 13.2880 < 2.2e-16 ***
age_2 1.4555e+01 9.7616e-01 14.9108 < 2.2e-16 ***
area_living_2 1.9719e-02 1.6137e-03 12.2194 < 2.2e-16 ***
data_factor$infections_3mma 8.6517e+00 4.9113e-01 17.6161 < 2.2e-16 ***
bottom25_sold_price -7.9843e+04 8.2900e+02 -96.3119 < 2.2e-16 ***
data_factor$infections_3mma:bottom25_sold_price -6.2653e+00 7.9397e-01 -7.8911 3.120e-15 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Conditional Mean
library(plyr)
age_mean_data <- ddply(data_factor, "infections_period", summarise, age_mean = mean(age, na.rm = TRUE))
# Distribution: Total
ggplot(data_factor, aes(x = age)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
ggtitle("Age Distribution") +
geom_vline(aes(xintercept = mean(age)), linetype="dashed", size= 0.4, alpha = 0.5, color = very_low) +
xlab("Age of Property") +
ylab("Density")
# Distribution: Infection
ggplot(data_factor, aes(x = age, fill = infections_period)) +
geom_density(alpha = 0.5, position = "identity") +
ggtitle("Age Distributions") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post")) +
geom_vline(data = age_mean_data, aes(xintercept = age_mean_data[2,2]), linetype="dashed", size= 0.5, color = med, alpha = 0.8) +
geom_vline(data = age_mean_data, aes(xintercept = age_mean_data[1,2]), linetype="dashed", size= 0.5, alpha = 0.8, color = very_low) +
xlab("Age of Property") +
ylab("Density")
?scale_fill_discrete()
# Distribution: Top vs. Bottom
ggplot(data_factor) +
geom_density(aes(x = age, fill = infections_period), alpha = 0.5, position = "identity") +
facet_grid(vars(top25_age, bottom25_age), scales = "free") +
ggtitle("Age Distributions") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post")) +
labs(fill = "Infection Period") +
xlab("Age of Property") +
ylab("Density")
#Age on Infections
ggplot(data_factor, aes(x = infections_period, y = age, fill = infections_period)) +
geom_violin(alpha = 0.5) +
geom_boxplot(width=0.1) +
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="D") +
coord_flip() +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=14)) +
ggtitle("Comparison of Age") +
xlab("Infection Period") +
ylab("Age of Property") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post"))
Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing scale.
coeftest(lm_corona_age_bottom, vcov = vcovHC(lm_corona_age_bottom, method = "White2", type = "HC0"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.3341e+05 3.2081e+04 4.1586 3.213e-05 ***
ac_typenone -4.5686e+04 1.9758e+03 -23.1223 < 2.2e-16 ***
ac_typenot_central -1.3725e+04 1.6034e+03 -8.5599 < 2.2e-16 ***
patio1 8.8777e+03 7.8834e+02 11.2611 < 2.2e-16 ***
school_general1 1.2233e+04 1.0519e+03 11.6291 < 2.2e-16 ***
photo_count 8.3004e+02 4.9235e+01 16.8587 < 2.2e-16 ***
pool1 9.7246e+03 1.4036e+03 6.9282 4.368e-12 ***
roof_typeother 3.4687e+03 1.4601e+03 2.3756 0.0175271 *
roof_typeshingle 2.2548e+04 1.6702e+03 13.5004 < 2.2e-16 ***
roof_typeslate 1.0875e+04 9.8409e+03 1.1051 0.2691232
gas_typenatural -8.5473e+04 3.7766e+03 -22.6323 < 2.2e-16 ***
gas_typenone -1.2673e+05 2.5713e+03 -49.2845 < 2.2e-16 ***
gas_typepropane -9.7511e+04 1.8583e+04 -5.2472 1.557e-07 ***
gas_typeunknown -1.3010e+05 2.4694e+03 -52.6863 < 2.2e-16 ***
out_building1 -6.6527e+03 8.3095e+02 -8.0061 1.236e-15 ***
land_acres 3.1612e+03 9.5672e+02 3.3042 0.0009539 ***
appliances1 2.5020e+04 1.1462e+03 21.8287 < 2.2e-16 ***
garage1 1.4086e+04 7.7556e+02 18.1622 < 2.2e-16 ***
property_conditionnew -6.1690e+03 6.6511e+03 -0.9275 0.3536702
property_conditionother -2.0673e+04 9.6156e+02 -21.4998 < 2.2e-16 ***
energy_efficient1 1.5373e+04 8.5713e+02 17.9354 < 2.2e-16 ***
exterior_typemetal -2.4707e+02 2.4198e+03 -0.1021 0.9186751
exterior_typeother 1.2851e+04 1.1016e+03 11.6659 < 2.2e-16 ***
exterior_typevinyl 5.8691e+03 1.1316e+03 5.1867 2.157e-07 ***
exterior_typewood 4.8121e+03 1.8249e+03 2.6370 0.0083702 **
exterior_featurescourtyard 4.5337e+04 1.5197e+04 2.9833 0.0028542 **
exterior_featuresfence -2.2494e+04 5.5120e+03 -4.0810 4.499e-05 ***
exterior_featuresnone -1.4390e+04 5.5240e+03 -2.6049 0.0091955 **
exterior_featuresporch -2.0275e+04 5.5771e+03 -3.6353 0.0002782 ***
exterior_featurestennis_court 8.8427e+03 1.0725e+04 0.8245 0.4096642
fireplace1 1.1842e+04 8.3865e+02 14.1201 < 2.2e-16 ***
foundation_typeslab 1.2592e+04 1.3040e+03 9.6567 < 2.2e-16 ***
foundation_typeunspecified 6.6525e+03 1.4485e+03 4.5926 4.400e-06 ***
beds_total1 -2.4476e+04 2.7523e+04 -0.8893 0.3738489
beds_total2 -2.5550e+04 2.7331e+04 -0.9348 0.3498804
beds_total3 -2.4087e+04 2.7328e+04 -0.8814 0.3781066
beds_total4 -2.0511e+04 2.7360e+04 -0.7497 0.4534551
beds_total5 -3.9284e+04 2.7797e+04 -1.4133 0.1575900
bath_full1 -3.8188e+04 2.4744e+04 -1.5433 0.1227657
bath_full2 -1.2393e+04 2.4737e+04 -0.5010 0.6163953
bath_full3 1.2200e+04 2.4816e+04 0.4916 0.6229929
bath_full4 1.3770e+04 3.0986e+04 0.4444 0.6567506
bath_full6 -7.2112e+03 2.5341e+04 -0.2846 0.7759828
bath_half1 1.2440e+04 1.1424e+03 10.8891 < 2.2e-16 ***
bath_half2 3.7417e+04 7.6533e+03 4.8890 1.020e-06 ***
bath_half3 6.4543e+04 8.3666e+03 7.7144 1.261e-14 ***
bath_half4 7.6590e+04 3.2113e+03 23.8501 < 2.2e-16 ***
bath_half5 -5.6216e+04 2.5008e+04 -2.2479 0.0245917 *
dom -6.2854e+01 5.8220e+00 -10.7959 < 2.2e-16 ***
sold_date 1.4337e+00 4.6958e-01 3.0531 0.0022670 **
sewer_typeseptic -6.4102e+03 1.4716e+03 -4.3560 1.330e-05 ***
sewer_typeunspecified -4.3032e+03 7.5897e+02 -5.6697 1.446e-08 ***
property_stylenot_mobile 6.9807e+04 1.7731e+03 39.3696 < 2.2e-16 ***
subdivision1 3.1875e+03 9.3573e+02 3.4065 0.0006592 ***
water_typewell 2.0497e+02 4.1549e+03 0.0493 0.9606545
waterfront1 2.0545e+04 1.5256e+03 13.4665 < 2.2e-16 ***
area_living_2 1.7648e-02 4.2033e-04 41.9858 < 2.2e-16 ***
data_factor$infections_3mma 9.0607e+00 7.2102e-01 12.5664 < 2.2e-16 ***
bottom25_age 2.5730e+04 9.6686e+02 26.6121 < 2.2e-16 ***
data_factor$infections_3mma:bottom25_age 1.5468e+00 9.1535e-01 1.6898 0.0910714 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Conditional Mean
library(plyr)
area_living_mean_data <- ddply(data_factor, "infections_period", summarise, area_living_mean = mean(area_living, na.rm = TRUE))
# Distribution: Total
ggplot(data_factor, aes(x = area_living)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
ggtitle("Living Area Distribution") +
geom_vline(aes(xintercept = mean(area_living)), linetype="dashed", size= 0.4, alpha = 0.5, color = very_low) +
xlab("Living Area") +
ylab("Density")
# Distribution: Infection
ggplot(data_factor, aes(x = area_living, fill = infections_period)) +
geom_density(alpha = 0.5, position = "identity") +
ggtitle("Living Area Distributions") +
geom_vline(data = area_living_mean_data, aes(xintercept = area_living_mean_data[2,2]), linetype="dashed", size= 0.5, color = med, alpha = 0.8) +
geom_vline(data = area_living_mean_data, aes(xintercept = area_living_mean_data[1,2]), linetype="dashed", size= 0.5, alpha = 0.8, color = very_low) +
xlab("Living Area") +
ylab("Density") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post"))
# Distribution: Top vs. Bottom
ggplot(data_factor) +
geom_density(aes(x = area_living, fill = infections_period), alpha = 0.5, position = "identity") +
facet_grid(vars(top25_area_living, bottom25_area_living), scales = "free") +
ggtitle("Living Area Distributions") +
xlab("Living Area") +
ylab("Density") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post"))
#area_living on Infections
ggplot(data_factor, aes(x = infections_period, y = area_living, fill = infections_period)) +
geom_violin(alpha = 0.5) +
geom_boxplot(width=0.1) +
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="D") +
coord_flip() +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=11)) +
ggtitle("Comparison of Living Area") +
xlab("Infection Period") +
ylab("Living Area") +
scale_fill_manual(values=c(very_low, med))
Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing scale.
coeftest(lm_corona_area_living_bottom, vcov = vcovHC(lm_corona_area_living_bottom, method = "White2", type = "HC0"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.1339e+05 3.3909e+04 6.2930 3.167e-10 ***
property_typeDUP -4.1450e+04 1.3956e+04 -2.9700 0.002980 **
property_typeOTH 3.2243e+04 1.5969e+04 2.0191 0.043490 *
property_typePAT 1.6802e+04 6.1298e+03 2.7411 0.006129 **
property_typeSGL 3.1499e+04 2.8575e+03 11.0232 < 2.2e-16 ***
property_typeTNH -1.0751e+03 3.6228e+03 -0.2968 0.766651
ac_typenone -4.5749e+04 1.9656e+03 -23.2750 < 2.2e-16 ***
ac_typenot_central -1.3048e+04 1.6571e+03 -7.8737 3.585e-15 ***
patio1 8.2691e+03 8.1088e+02 10.1977 < 2.2e-16 ***
school_general1 1.0025e+04 1.0796e+03 9.2861 < 2.2e-16 ***
photo_count 1.0654e+03 5.1381e+01 20.7345 < 2.2e-16 ***
pool1 1.7630e+04 1.4758e+03 11.9461 < 2.2e-16 ***
roof_typeother 5.8738e+03 1.4607e+03 4.0213 5.805e-05 ***
roof_typeshingle 2.4781e+04 1.6803e+03 14.7482 < 2.2e-16 ***
roof_typeslate 1.5016e+04 1.0401e+04 1.4437 0.148832
gas_typenatural -7.2088e+04 3.6703e+03 -19.6407 < 2.2e-16 ***
gas_typenone -1.1453e+05 2.5305e+03 -45.2579 < 2.2e-16 ***
gas_typepropane -8.4275e+04 1.7473e+04 -4.8232 1.421e-06 ***
gas_typeunknown -1.1604e+05 2.4100e+03 -48.1506 < 2.2e-16 ***
out_building1 -5.3839e+03 8.6629e+02 -6.2148 5.220e-10 ***
land_acres 5.1387e+03 9.7520e+02 5.2694 1.380e-07 ***
appliances1 2.3783e+04 1.1649e+03 20.4166 < 2.2e-16 ***
garage1 1.3445e+04 8.0352e+02 16.7324 < 2.2e-16 ***
property_conditionnew -2.4761e+04 6.4570e+03 -3.8347 0.000126 ***
property_conditionother -2.0736e+04 9.9837e+02 -20.7701 < 2.2e-16 ***
energy_efficient1 1.4537e+04 8.7617e+02 16.5912 < 2.2e-16 ***
exterior_typemetal -1.8255e+03 2.3499e+03 -0.7768 0.437268
exterior_typeother 1.2592e+04 1.1204e+03 11.2388 < 2.2e-16 ***
exterior_typevinyl 3.1157e+03 1.1613e+03 2.6830 0.007302 **
exterior_typewood 3.0180e+03 1.8587e+03 1.6237 0.104449
exterior_featurescourtyard 3.7328e+04 1.3936e+04 2.6785 0.007400 **
exterior_featuresfence -3.2656e+04 5.6094e+03 -5.8217 5.897e-09 ***
exterior_featuresnone -2.5255e+04 5.6117e+03 -4.5004 6.814e-06 ***
exterior_featuresporch -3.2058e+04 5.6736e+03 -5.6505 1.618e-08 ***
exterior_featurestennis_court 5.4193e+03 1.2188e+04 0.4447 0.656574
fireplace1 2.0496e+04 8.3541e+02 24.5339 < 2.2e-16 ***
foundation_typeslab 1.3667e+04 1.3123e+03 10.4143 < 2.2e-16 ***
foundation_typeunspecified 7.7983e+03 1.4477e+03 5.3867 7.242e-08 ***
beds_total1 -2.4930e+04 2.6346e+04 -0.9463 0.344019
beds_total2 -2.8425e+04 2.6173e+04 -1.0860 0.277468
beds_total3 -3.1603e+04 2.6194e+04 -1.2065 0.227639
beds_total4 -1.3294e+04 2.6225e+04 -0.5069 0.612212
beds_total5 -1.7741e+04 2.6701e+04 -0.6644 0.506431
bath_full1 -4.9882e+04 2.7936e+04 -1.7856 0.074177 .
bath_full2 -1.2439e+04 2.7934e+04 -0.4453 0.656094
bath_full3 4.0427e+04 2.8004e+04 1.4436 0.148852
bath_full4 5.4599e+04 3.4427e+04 1.5859 0.112770
bath_full6 3.2827e+04 2.8680e+04 1.1446 0.252383
bath_half1 2.8883e+04 1.1610e+03 24.8787 < 2.2e-16 ***
bath_half2 5.8058e+04 8.7735e+03 6.6175 3.729e-11 ***
bath_half3 5.9695e+04 1.3787e+04 4.3299 1.498e-05 ***
bath_half4 6.4025e+04 3.3800e+03 18.9422 < 2.2e-16 ***
bath_half5 -3.7625e+04 3.9698e+04 -0.9478 0.343252
age -1.9340e+03 8.6008e+01 -22.4866 < 2.2e-16 ***
dom -5.2607e+01 5.9970e+00 -8.7722 < 2.2e-16 ***
sold_date 6.8952e-01 4.9820e-01 1.3840 0.166363
sewer_typeseptic -7.5605e+03 1.5115e+03 -5.0021 5.710e-07 ***
sewer_typeunspecified -6.8537e+03 7.9062e+02 -8.6688 < 2.2e-16 ***
property_stylenot_mobile 6.8228e+04 1.7363e+03 39.2941 < 2.2e-16 ***
subdivision1 2.6809e+03 9.5201e+02 2.8161 0.004866 **
water_typewell 2.2071e+03 4.2381e+03 0.5208 0.602522
waterfront1 2.1468e+04 1.5887e+03 13.5129 < 2.2e-16 ***
age_2 1.7967e+01 1.1910e+00 15.0862 < 2.2e-16 ***
data_factor$infections_3mma 1.0638e+01 6.7630e-01 15.7293 < 2.2e-16 ***
bottom25_area_living -2.3386e+04 9.1376e+02 -25.5936 < 2.2e-16 ***
data_factor$infections_3mma:bottom25_area_living -3.8844e+00 8.7552e-01 -4.4366 9.178e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Conditional Mean
library(plyr)
dom_mean_data <- ddply(data_factor, "infections_period", summarise, dom_mean = mean(dom, na.rm = TRUE))
# Distribution: Just for City
ggplot(data_factor, aes(x = dom)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
ggtitle("Days on Market Distribution") +
geom_vline(aes(xintercept = mean(dom)), linetype="dashed", size= 0.4, alpha = 0.5, color = very_low) +
xlab("Days on Market") +
ylab("Density")
# Distribution: Infection
ggplot(data_factor, aes(x = dom, fill = infections_period)) +
geom_density(alpha = 0.5, position = "identity") +
ggtitle("Days on Market Distributions") +
geom_vline(data = dom_mean_data, aes(xintercept = dom_mean_data[2,2]), linetype="dashed", size= 0.5, color = med, alpha = 0.8) +
geom_vline(data = dom_mean_data, aes(xintercept = dom_mean_data[1,2]), linetype="dashed", size= 0.5, alpha = 0.8, color = very_low) +
xlab("Days on Market") +
ylab("Density") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post"))
# Distribution: Top vs. Bottom
ggplot(data_factor) +
geom_density(aes(x = dom, fill = infections_period), alpha = 0.5, position = "identity") +
facet_grid(vars(top25_dom, bottom25_dom), scales = "free") +
ggtitle("Days on Market Distributions") +
xlab("Days on Market") +
ylab("Density") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post"))
#dom on Infections
ggplot(data_factor, aes(x = infections_period, y = dom, fill = infections_period)) +
geom_violin(alpha = 0.5) +
geom_boxplot(width=0.1) +
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="D") +
#coord_flip() +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=11)) +
ggtitle("Comparison of Days on Market") +
xlab("Infection Period") +
ylab("Days on Market") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post"))
Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing scale.
coeftest(lm_corona_dom_bottom, vcov = vcovHC(lm_corona_dom_bottom, method = "White2", type = "HC0"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.8708e+05 3.2369e+04 5.7796 7.578e-09 ***
ac_typenone -4.3502e+04 1.9826e+03 -21.9419 < 2.2e-16 ***
ac_typenot_central -1.3181e+04 1.5901e+03 -8.2895 < 2.2e-16 ***
patio1 7.7980e+03 7.7727e+02 10.0326 < 2.2e-16 ***
school_general1 1.0761e+04 1.0337e+03 10.4104 < 2.2e-16 ***
photo_count 9.9320e+02 4.9250e+01 20.1664 < 2.2e-16 ***
pool1 1.0726e+04 1.3912e+03 7.7098 1.307e-14 ***
roof_typeother 2.7998e+03 1.4391e+03 1.9455 0.0517306 .
roof_typeshingle 1.9954e+04 1.6443e+03 12.1353 < 2.2e-16 ***
roof_typeslate 1.0039e+04 9.9523e+03 1.0087 0.3131423
gas_typenatural -9.0369e+04 3.6267e+03 -24.9175 < 2.2e-16 ***
gas_typenone -1.2972e+05 2.5068e+03 -51.7475 < 2.2e-16 ***
gas_typepropane -1.0043e+05 1.7888e+04 -5.6147 1.991e-08 ***
gas_typeunknown -1.3365e+05 2.4104e+03 -55.4476 < 2.2e-16 ***
out_building1 -5.1575e+03 8.2715e+02 -6.2352 4.585e-10 ***
area_living 3.3177e+01 6.1916e+00 5.3584 8.472e-08 ***
land_acres 3.3123e+03 9.5344e+02 3.4740 0.0005136 ***
appliances1 2.4473e+04 1.1320e+03 21.6202 < 2.2e-16 ***
garage1 1.2334e+04 7.6942e+02 16.0305 < 2.2e-16 ***
property_conditionnew -2.3336e+04 6.5133e+03 -3.5829 0.0003405 ***
property_conditionother -2.0485e+04 9.4800e+02 -21.6090 < 2.2e-16 ***
energy_efficient1 1.4336e+04 8.4221e+02 17.0216 < 2.2e-16 ***
exterior_typemetal -9.4351e+01 2.3687e+03 -0.0398 0.9682277
exterior_typeother 1.2027e+04 1.0753e+03 11.1843 < 2.2e-16 ***
exterior_typevinyl 5.5264e+03 1.1123e+03 4.9682 6.802e-07 ***
exterior_typewood 3.7714e+03 1.7809e+03 2.1177 0.0342126 *
exterior_featurescourtyard 4.0526e+04 1.4388e+04 2.8167 0.0048552 **
exterior_featuresfence -2.2187e+04 5.4551e+03 -4.0673 4.770e-05 ***
exterior_featuresnone -1.6005e+04 5.4667e+03 -2.9277 0.0034174 **
exterior_featuresporch -2.3004e+04 5.5210e+03 -4.1666 3.103e-05 ***
exterior_featurestennis_court 7.5932e+03 1.0821e+04 0.7017 0.4828551
fireplace1 1.1940e+04 8.3389e+02 14.3183 < 2.2e-16 ***
foundation_typeslab 1.3321e+04 1.2877e+03 10.3450 < 2.2e-16 ***
foundation_typeunspecified 7.8067e+03 1.4283e+03 5.4656 4.658e-08 ***
beds_total1 -2.9076e+04 2.6956e+04 -1.0786 0.2807572
beds_total2 -3.4438e+04 2.6858e+04 -1.2822 0.1997713
beds_total3 -3.4970e+04 2.6893e+04 -1.3003 0.1935003
beds_total4 -3.1785e+04 2.6925e+04 -1.1805 0.2378043
beds_total5 -4.9896e+04 2.7337e+04 -1.8252 0.0679782 .
bath_full1 -3.3866e+04 2.3351e+04 -1.4503 0.1469909
bath_full2 -1.1397e+04 2.3337e+04 -0.4884 0.6252799
bath_full3 1.4332e+04 2.3430e+04 0.6117 0.5407548
bath_full4 1.7551e+04 2.9478e+04 0.5954 0.5515762
bath_full6 -1.5768e+04 2.4025e+04 -0.6563 0.5116069
bath_half1 1.2603e+04 1.1311e+03 11.1419 < 2.2e-16 ***
bath_half2 3.7982e+04 7.7802e+03 4.8819 1.057e-06 ***
bath_half3 5.7772e+04 1.2170e+04 4.7472 2.074e-06 ***
bath_half4 8.2835e+04 3.2808e+03 25.2484 < 2.2e-16 ***
bath_half5 -5.7385e+04 2.7280e+04 -2.1036 0.0354271 *
age -2.0364e+03 8.3985e+01 -24.2475 < 2.2e-16 ***
sold_date -6.4800e-02 4.7932e-01 -0.1352 0.8924613
sewer_typeseptic -5.7769e+03 1.4590e+03 -3.9596 7.529e-05 ***
sewer_typeunspecified -4.6494e+03 7.5422e+02 -6.1646 7.177e-10 ***
property_stylenot_mobile 6.7806e+04 1.7712e+03 38.2830 < 2.2e-16 ***
subdivision1 3.5519e+03 9.1933e+02 3.8636 0.0001120 ***
water_typewell 1.5226e+03 4.0416e+03 0.3767 0.7063762
waterfront1 2.0260e+04 1.5043e+03 13.4680 < 2.2e-16 ***
age_2 1.8678e+01 1.1788e+00 15.8454 < 2.2e-16 ***
area_living_2 8.9998e-03 1.7761e-03 5.0672 4.067e-07 ***
data_factor$infections_3mma 1.0360e+01 7.4316e-01 13.9409 < 2.2e-16 ***
bottom25_dom 1.4593e+04 1.0153e+03 14.3736 < 2.2e-16 ***
data_factor$infections_3mma:bottom25_dom -2.1908e+00 9.2971e-01 -2.3564 0.0184597 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Conditional Mean
library(plyr)
city_limits_mean_data <- ddply(subset(data_factor, data_factor$city_limits ==1), "infections_period", summarise, city_limits_mean = mean(sold_price, na.rm = TRUE))
# Distribution: Just City
ggplot(data = subset(data_factor, data_factor$city_limits ==1), aes(x = sold_price)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
ggtitle("Price Distribution of Properties in City Limits") +
geom_vline(aes(xintercept = mean(city_limits)), linetype="dashed", size= 0.4, alpha = 0.5) +
xlab("Sold Price") +
ylab("Density")
Warning in mean.default(city_limits) :
argument is not numeric or logical: returning NA
Warning: Removed 23399 rows containing missing values (geom_vline).
# Distribution: Infection
ggplot(data = subset(data_factor, data_factor$city_limits ==1), aes(x = sold_price, fill = infections_period)) +
geom_density(alpha = 0.5, position = "identity") +
ggtitle("Price Distributions of Properties in City Limits") +
geom_vline(data = city_limits_mean_data, aes(xintercept = city_limits_mean_data[2,2]), linetype="dashed", size= 0.5, color = med, alpha = 0.8) +
geom_vline(data = city_limits_mean_data, aes(xintercept = city_limits_mean_data[1,2]), linetype="dashed", size= 0.5, alpha = 0.8, color = very_low) +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post")) +
xlab("Sold Price") +
ylab("Density")
#city_limits on Infections
ggplot(data_factor, aes(x = city_limits, y = sold_price, fill = infections_period)) +
geom_violin(alpha = 0.5) +
geom_boxplot(width=0.1, alpha = 0.9) +
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="D") +
#coord_flip() +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=14)) +
ggtitle("Comparison of Price: City Limts and Pre vs. Post Corona") +
xlab("City Limits and Infection Period") +
ylab("Sold Price") +
scale_fill_manual(values = c(very_low, med),
name = "City Limits",
labels = c("Pre", "Post"))
Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing scale.
# Testing Corona, City Limits
lm_corona_city <- lm(sold_price ~ .
# test variable(s)
+ data_factor$infections_3mma + data_factor$city_limits
+ data_factor$infections_3mma*data_factor$city_limits
,data = data_factor_core_clean)
coeftest(lm_corona_city, vcov = vcovHC(lm_corona_city, method = "White2", type = "HC0"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.7861e+05 3.3976e+04 5.2570 1.477e-07 ***
property_typeDUP -5.2256e+04 1.5467e+04 -3.3785 0.0007300 ***
property_typeOTH 2.5209e+04 1.5054e+04 1.6746 0.0940318 .
property_typePAT 1.6422e+04 5.5954e+03 2.9350 0.0033385 **
property_typeSGL 2.2707e+04 2.7116e+03 8.3742 < 2.2e-16 ***
property_typeTNH -3.4785e+03 3.3351e+03 -1.0430 0.2969503
ac_typenone -4.5713e+04 1.9689e+03 -23.2178 < 2.2e-16 ***
ac_typenot_central -1.3706e+04 1.5987e+03 -8.5732 < 2.2e-16 ***
patio1 8.1712e+03 7.7788e+02 10.5044 < 2.2e-16 ***
school_general1 1.1846e+04 1.0460e+03 11.3259 < 2.2e-16 ***
photo_count 9.1824e+02 4.8848e+01 18.7978 < 2.2e-16 ***
pool1 1.3109e+04 1.3993e+03 9.3684 < 2.2e-16 ***
roof_typeother 3.6223e+03 1.4471e+03 2.5032 0.0123145 *
roof_typeshingle 2.1298e+04 1.6497e+03 12.9106 < 2.2e-16 ***
roof_typeslate 1.0056e+04 9.8591e+03 1.0200 0.3077618
gas_typenatural -8.9629e+04 3.6449e+03 -24.5899 < 2.2e-16 ***
gas_typenone -1.3151e+05 2.4644e+03 -53.3656 < 2.2e-16 ***
gas_typepropane -9.9907e+04 1.8268e+04 -5.4690 4.571e-08 ***
gas_typeunknown -1.3692e+05 2.3453e+03 -58.3818 < 2.2e-16 ***
out_building1 -6.1045e+03 8.2702e+02 -7.3814 1.616e-13 ***
area_living 3.2060e+01 6.1705e+00 5.1957 2.056e-07 ***
land_acres 2.0637e+03 9.4585e+02 2.1819 0.0291294 *
appliances1 2.4475e+04 1.1334e+03 21.5939 < 2.2e-16 ***
garage1 1.2014e+04 7.7206e+02 15.5615 < 2.2e-16 ***
property_conditionnew -2.1188e+04 6.2676e+03 -3.3805 0.0007246 ***
property_conditionother -2.1335e+04 9.5483e+02 -22.3443 < 2.2e-16 ***
energy_efficient1 1.3986e+04 8.4013e+02 16.6469 < 2.2e-16 ***
exterior_typemetal -7.3384e+01 2.3631e+03 -0.0311 0.9752273
exterior_typeother 1.1645e+04 1.0751e+03 10.8307 < 2.2e-16 ***
exterior_typevinyl 5.0111e+03 1.1136e+03 4.5001 6.823e-06 ***
exterior_typewood 3.7778e+03 1.7816e+03 2.1205 0.0339778 *
exterior_featurescourtyard 3.3821e+04 1.4091e+04 2.4002 0.0163944 *
exterior_featuresfence -3.1962e+04 5.3284e+03 -5.9984 2.021e-09 ***
exterior_featuresnone -2.4953e+04 5.3355e+03 -4.6769 2.928e-06 ***
exterior_featuresporch -3.2028e+04 5.3922e+03 -5.9396 2.895e-09 ***
exterior_featurestennis_court -5.6576e+02 1.0551e+04 -0.0536 0.9572380
fireplace1 1.1828e+04 8.3361e+02 14.1887 < 2.2e-16 ***
foundation_typeslab 1.4938e+04 1.2903e+03 11.5773 < 2.2e-16 ***
foundation_typeunspecified 8.3762e+03 1.4287e+03 5.8630 4.604e-09 ***
beds_total1 -3.0336e+04 2.5401e+04 -1.1943 0.2323774
beds_total2 -3.8930e+04 2.5313e+04 -1.5379 0.1240784
beds_total3 -4.5128e+04 2.5374e+04 -1.7785 0.0753342 .
beds_total4 -4.2724e+04 2.5412e+04 -1.6812 0.0927301 .
beds_total5 -6.0622e+04 2.5853e+04 -2.3449 0.0190400 *
bath_full1 -3.2997e+04 2.5077e+04 -1.3158 0.1882552
bath_full2 -8.1502e+03 2.5069e+04 -0.3251 0.7450976
bath_full3 1.8659e+04 2.5159e+04 0.7416 0.4583086
bath_full4 2.1358e+04 3.1105e+04 0.6866 0.4923183
bath_full6 1.9232e+04 2.5880e+04 0.7431 0.4574071
bath_half1 1.4021e+04 1.1345e+03 12.3586 < 2.2e-16 ***
bath_half2 3.8677e+04 7.9272e+03 4.8790 1.073e-06 ***
bath_half3 5.8459e+04 1.0835e+04 5.3952 6.909e-08 ***
bath_half4 7.1968e+04 3.2187e+03 22.3594 < 2.2e-16 ***
bath_half5 -6.1887e+04 2.7837e+04 -2.2232 0.0262144 *
age -2.0199e+03 8.4330e+01 -23.9525 < 2.2e-16 ***
dom -6.2165e+01 5.7948e+00 -10.7278 < 2.2e-16 ***
sold_date 3.8776e-01 4.7523e-01 0.8159 0.4145389
sewer_typeseptic -5.7389e+03 1.4748e+03 -3.8912 1.000e-04 ***
sewer_typeunspecified -4.6601e+03 7.5909e+02 -6.1391 8.424e-10 ***
property_stylenot_mobile 6.8636e+04 1.7654e+03 38.8784 < 2.2e-16 ***
subdivision1 3.6139e+03 9.1778e+02 3.9376 8.252e-05 ***
water_typewell 5.8505e+03 4.1916e+03 1.3958 0.1627978
waterfront1 2.0355e+04 1.5069e+03 13.5081 < 2.2e-16 ***
age_2 1.8234e+01 1.1843e+00 15.3960 < 2.2e-16 ***
area_living_2 9.0448e-03 1.7690e-03 5.1129 3.197e-07 ***
data_factor$infections_3mma 5.1147e+00 1.6642e+00 3.0733 0.0021194 **
data_factor$city_limits1 7.2944e+03 2.2063e+03 3.3062 0.0009470 ***
data_factor$infections_3mma:data_factor$city_limits1 4.9912e+00 1.6744e+00 2.9809 0.0028764 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
data_index <- read_excel("/Users/sawyerbenson/Documents/Master Thesis/Thesis_Github/Models/Data/New Data/Index_hardkey.xlsx")
attach(data_index)
The following objects are masked from data_index (pos = 6):
Date, lma_2m, lma_2m_index, lma_3m, lma_3m_index, lma_4m, lma_4m_index, lma_5m,
lma_5m_index, log_wappsf, ma_2m, ma_3m, ma_4m, ma_5m, wappsf
The following objects are masked from data_index (pos = 10):
Date, lma_2m, lma_2m_index, lma_3m, lma_3m_index, lma_4m, lma_4m_index, lma_5m,
lma_5m_index, log_wappsf, ma_2m, ma_3m, ma_4m, ma_5m, wappsf
The following objects are masked from data_index (pos = 14):
Date, lma_2m, lma_2m_index, lma_3m, lma_3m_index, lma_4m, lma_4m_index, lma_5m,
lma_5m_index, log_wappsf, ma_2m, ma_3m, ma_4m, ma_5m, wappsf
The following objects are masked from data_index (pos = 17):
Date, lma_2m, lma_2m_index, lma_3m, lma_3m_index, lma_4m, lma_4m_index, lma_5m,
lma_5m_index, log_wappsf, ma_2m, ma_3m, ma_4m, ma_5m, wappsf
The following objects are masked from data_index (pos = 23):
Date, lma_2m, lma_2m_index, lma_3m, lma_3m_index, lma_4m, lma_4m_index, lma_5m,
lma_5m_index, log_wappsf, ma_2m, ma_3m, ma_4m, ma_5m, wappsf
The following objects are masked from data_index (pos = 27):
Date, lma_2m, lma_2m_index, lma_3m, lma_3m_index, lma_4m, lma_4m_index, lma_5m,
lma_5m_index, log_wappsf, ma_2m, ma_3m, ma_4m, ma_5m, wappsf
The following objects are masked from data_index (pos = 31):
Date, lma_2m, lma_2m_index, lma_3m, lma_3m_index, lma_4m, lma_4m_index, lma_5m,
lma_5m_index, log_wappsf, ma_2m, ma_3m, ma_4m, ma_5m, wappsf
The following objects are masked from data_index (pos = 35):
Date, lma_2m, lma_2m_index, lma_3m, lma_3m_index, lma_4m, lma_4m_index, lma_5m,
lma_5m_index, log_wappsf, ma_2m, ma_3m, ma_4m, ma_5m, wappsf
The following objects are masked from data_index (pos = 39):
Date, lma_2m, lma_2m_index, lma_3m, lma_3m_index, lma_4m, lma_4m_index, lma_5m,
lma_5m_index, log_wappsf, ma_2m, ma_3m, ma_4m, ma_5m, wappsf
data_index_fred <- read_excel("/Users/sawyerbenson/Documents/Master Thesis/Thesis_Github/Models/Data/New Data/Index_FRED.xls")
attach(data_index_fred)
The following object is masked from data_index_fred_1975_total (pos = 4):
date
The following object is masked from data_index_gdp (pos = 5):
date
The following objects are masked from data_index_fred (pos = 6):
date, index_Q1_1980
The following object is masked from data_index_fred_1975_total (pos = 8):
date
The following object is masked from data_index_gdp (pos = 9):
date
The following objects are masked from data_index_fred (pos = 10):
date, index_Q1_1980
The following object is masked from data_index_fred_1975_total (pos = 12):
date
The following object is masked from data_index_gdp (pos = 13):
date
The following objects are masked from data_index_fred (pos = 14):
date, index_Q1_1980
The following object is masked from data_index_gdp (pos = 16):
date
The following objects are masked from data_index_fred (pos = 17):
date, index_Q1_1980
The following object is masked from data_index_fred_1975_total (pos = 19):
date
The following object is masked from data_index_fred_1975_total (pos = 20):
date
The following object is masked from data_index_gdp (pos = 22):
date
The following objects are masked from data_index_fred (pos = 23):
date, index_Q1_1980
The following object is masked from data_index_gdp (pos = 26):
date
The following objects are masked from data_index_fred (pos = 27):
date, index_Q1_1980
The following object is masked from data_index_gdp (pos = 30):
date
The following objects are masked from data_index_fred (pos = 31):
date, index_Q1_1980
The following object is masked from data_index_gdp (pos = 34):
date
The following objects are masked from data_index_fred (pos = 35):
date, index_Q1_1980
The following object is masked from data_index_gdp (pos = 38):
date
The following objects are masked from data_index_fred (pos = 39):
date, index_Q1_1980
data_index_gdp <- read_excel("/Users/sawyerbenson/Documents/Master Thesis/Thesis_Github/Models/Data/New Data/la_GDP.xls")
attach(data_index_gdp)
The following object is masked from data_index_fred (pos = 3):
date
The following object is masked from data_index_fred_1975_total (pos = 5):
date
The following objects are masked from data_index_gdp (pos = 6):
date, real_gdp, real_gdp_Index, real_gdp_re_specific, real_gdp_re_specific_index
The following object is masked from data_index_fred (pos = 7):
date
The following object is masked from data_index_fred_1975_total (pos = 9):
date
The following objects are masked from data_index_gdp (pos = 10):
date, real_gdp, real_gdp_Index, real_gdp_re_specific, real_gdp_re_specific_index
The following object is masked from data_index_fred (pos = 11):
date
The following object is masked from data_index_fred_1975_total (pos = 13):
date
The following objects are masked from data_index_gdp (pos = 14):
date, real_gdp, real_gdp_Index, real_gdp_re_specific, real_gdp_re_specific_index
The following object is masked from data_index_fred (pos = 15):
date
The following objects are masked from data_index_gdp (pos = 17):
date, real_gdp, real_gdp_Index, real_gdp_re_specific, real_gdp_re_specific_index
The following object is masked from data_index_fred (pos = 18):
date
The following object is masked from data_index_fred_1975_total (pos = 20):
date
The following object is masked from data_index_fred_1975_total (pos = 21):
date
The following objects are masked from data_index_gdp (pos = 23):
date, real_gdp, real_gdp_Index, real_gdp_re_specific, real_gdp_re_specific_index
The following object is masked from data_index_fred (pos = 24):
date
The following objects are masked from data_index_gdp (pos = 27):
date, real_gdp, real_gdp_Index, real_gdp_re_specific, real_gdp_re_specific_index
The following object is masked from data_index_fred (pos = 28):
date
The following objects are masked from data_index_gdp (pos = 31):
date, real_gdp, real_gdp_Index, real_gdp_re_specific, real_gdp_re_specific_index
The following object is masked from data_index_fred (pos = 32):
date
The following objects are masked from data_index_gdp (pos = 35):
date, real_gdp, real_gdp_Index, real_gdp_re_specific, real_gdp_re_specific_index
The following object is masked from data_index_fred (pos = 36):
date
The following objects are masked from data_index_gdp (pos = 39):
date, real_gdp, real_gdp_Index, real_gdp_re_specific, real_gdp_re_specific_index
The following object is masked from data_index_fred (pos = 40):
date
data_index_fred_1975_total <- read_excel("/Users/sawyerbenson/Documents/Master Thesis/Thesis_Github/Models/Data/New Data/Total_US_1975.xls")
attach(data_index_fred_1975_total)
The following object is masked from data_index_gdp (pos = 3):
date
The following object is masked from data_index_fred (pos = 4):
date
The following objects are masked from data_index_fred_1975_total (pos = 6):
date, gdp_pc_nom_index_1975, gdp_pc_real_index_1975, re_cpi, re_nom_index_1975,
re_real_index_1975
The following object is masked from data_index_gdp (pos = 7):
date
The following object is masked from data_index_fred (pos = 8):
date
The following objects are masked from data_index_fred_1975_total (pos = 10):
date, gdp_pc_nom_index_1975, gdp_pc_real_index_1975, re_cpi, re_nom_index_1975,
re_real_index_1975
The following object is masked from data_index_gdp (pos = 11):
date
The following object is masked from data_index_fred (pos = 12):
date
The following objects are masked from data_index_fred_1975_total (pos = 14):
date, gdp_pc_nom_index_1975, gdp_pc_real_index_1975, re_cpi, re_nom_index_1975,
re_real_index_1975
The following object is masked from data_index_gdp (pos = 15):
date
The following object is masked from data_index_fred (pos = 16):
date
The following object is masked from data_index_gdp (pos = 18):
date
The following object is masked from data_index_fred (pos = 19):
date
The following objects are masked from data_index_fred_1975_total (pos = 21):
date, gdp_pc_nom_index_1975, gdp_pc_real_index_1975, re_cpi, re_nom_index_1975,
re_real_index_1975
The following object is masked from data_index_fred_1975_total (pos = 22):
date
The following object is masked from data_index_gdp (pos = 24):
date
The following object is masked from data_index_fred (pos = 25):
date
The following object is masked from data_index_gdp (pos = 28):
date
The following object is masked from data_index_fred (pos = 29):
date
The following object is masked from data_index_gdp (pos = 32):
date
The following object is masked from data_index_fred (pos = 33):
date
The following object is masked from data_index_gdp (pos = 36):
date
The following object is masked from data_index_fred (pos = 37):
date
The following object is masked from data_index_gdp (pos = 40):
date
The following object is masked from data_index_fred (pos = 41):
date
# Index graphing
ggplot(data_index, aes(x = Date)) +
geom_line(mapping = aes(y = lma_2m_index), color = "darkred") +
geom_line(mapping = aes(y = lma_3m_index), color = "darkgreen") +
geom_line(mapping = aes(y = lma_4m_index), color = "darkblue") +
geom_line(mapping = aes(y = lma_5m_index), color = "grey45") +
geom_vline(xintercept = as.numeric(as.Date("2020-03-23")), linetype=4, color = "green") +
#scale_x_date(limits = as.Date(c("2020-01-01", "2021-12-31"))) +
scale_y_continuous(limits = c(min(lma_2m_index),max(lma_2m_index))) +
xlab(" ") +
ylab("Weighted Average Price per Sqft.") +
labs(title = "Louisiana Housing Index",
caption = "")
# FRED quarterly data
ggplot(data_index_fred, aes(x = date)) +
geom_line(aes(y = index_Q1_1980), color = "darkred") +
theme_minimal() +
geom_vline(xintercept = as.Date("2020-01-01"), linetype=4, color = "green") +
#scale_x_date(limits = as.Date(c("2020-01-01", "2021-12-31"))) +
scale_y_continuous(limits = c(min(index_Q1_1980),max(index_Q1_1980))) +
xlab(" ") +
ylab("Index Value") +
labs(title = "Louisiana Housing Index: FRED St. Louis",
caption = "")
# La Real GDP data quarterly data
data_index_gdp <- subset(data_index_gdp, data_index_gdp$date >= as.Date("2011-07-01"))
ggplot(data_index_gdp, aes(x = date)) +
geom_line(aes(y = real_gdp_Index, color = "darkred"), linetype = "dashed", size = .5) +
geom_line(aes(y = real_gdp_re_specific_index, color = "darkblue"), size = .5) +
theme(legend.position = "bottom") +
geom_vline(xintercept = as.Date("2020-01-01"), linetype = 4, color = "green") +
#scale_x_date(limits = as.Date(c("2020-01-01", "2021-12-31"))) +
#scale_y_continuous(limits = c(min(real_gdp_Index),max(real_gdp_Index))) +
xlab(" ") +
ylab("Index Value") +
labs(title = "Louisiana GDP and Housing Index: FRED St. Louis",
caption = "") +
scale_color_discrete(name = "Infection Period",
labels = c("RE Index", "Aggrigate GDP Index"))
cor.test(real_gdp_Index, real_gdp_re_specific_index)
Pearson's product-moment correlation
data: real_gdp_Index and real_gdp_re_specific_index
t = -2.0521, df = 65, p-value = 0.04419
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.459638677 -0.006861981
sample estimates:
cor
-0.246664
# TOTAL US Real GDP data quarterly data base 2011
ggplot(data_index_fred_total, aes(x = observation_date)) +
geom_line(aes(y = GDP, color = very_low), linetype = "dashed", size = .5) +
geom_line(aes(y = all_re_index, color = med), size = .5) +
theme(legend.position = "bottom" ) +
geom_vline(xintercept = as.Date("2020-01-01"), linetype=4, color = "green") +
#scale_x_date(limits = as.Date(c("2020-01-01", "2021-12-31"))) +
#scale_y_continuous(limits = c(min(real_gdp_Index),max(real_gdp_Index))) +
xlab(" ") +
ylab("Index Value") +
labs(title = "Total US GDP and Housing Index: FRED St. Louis",
caption = "") +
scale_color_discrete(name = "Infection Period",
labels = c("RE Index", "Aggrigate GDP Index"))
# TOTAL US Real GDP data quarterly data base 1975
# Nominal
ggplot(data_index_fred_1975_total, aes(x = date)) +
geom_line(aes(y = gdp_pc_nom_index_1975 , color = very_low), linetype = "dashed", size = .5) +
geom_line(aes(y = re_nom_index_1975, color = med), size = .5) +
theme(legend.position = "bottom" ) +
#geom_vline(xintercept = as.Date("2020-01-01"), linetype=4, color = "green") +
#scale_x_date(limits = as.Date(c("2020-01-01", "2021-12-31"))) +
#scale_y_continuous(limits = c(min(real_gdp_Index),max(real_gdp_Index))) +
xlab(" ") +
ylab("Index Value (1975 Q1 = 100)") +
labs(title = "U.S. GDP and Housing Index",
caption = "FRED, St. Louis") +
scale_color_discrete(name = "",
labels = c("Nominal Housing Prices", "Nominal GDP Per-Capita"))
corr_nom_1975 <- cor(gdp_pc_nom_index_1975, re_nom_index_1975)
# Real
ggplot(data_index_fred_1975_total, aes(x = date)) +
geom_line(aes(y = gdp_pc_real_index_1975 , color = very_low), linetype = "dashed", size = .5) +
geom_line(aes(y = re_real_index_1975, color = med), size = .5) +
theme(legend.position = "bottom" ) +
#geom_vline(xintercept = as.Date("2020-01-01"), linetype=4, color = "green") +
#scale_x_date(limits = as.Date(c("2020-01-01", "2021-12-31"))) +
#scale_y_continuous(limits = c(min(real_gdp_Index),max(real_gdp_Index))) +
xlab(" ") +
ylab("Index Value (1975 Q1 = 100)") +
labs(title = "U.S. GDP and Housing Index",
caption = "FRED, St. Louis") +
scale_color_discrete(name = "",
labels = c("Real Housing Prices", "Real GDP Per-Capita"))
corr_real_1975 <- cor(gdp_pc_real_index_1975, re_real_index_1975)
corr_nom_1975
[1] 0.9827026
corr_real_1975
[1] 0.6235355
# packages
require(ggplot2)
install.packages("ggmap")
require(maps)
install.packages(Geoc)
#Basic Map
LA <- map_data("state", region="louisiana")
ggplot(LA, aes(x=long, y=lat))+geom_polygon()
# data
salesCalls <- data.frame(State=rep("louisiana",5),
City=c("Baton Rouge","New Orleans", "Shreveport", "Lafayette", "Mandeville"),
Calls=c(10,5,8,13,2))
salesCalls <- cbind(geocode(as.character(salesCalls$City)), salesCalls)
?cbind
ggplot(LA, aes(x=long, y=lat)) +
geom_polygon() +
coord_map() +
geom_point(data=salesCalls, aes(x=lon, y=lat, size=Calls), color="orange")
library(boot) # K-fold
library(leaps) # Subset
library(glmnet) #glmnet() is the main function in the glmnet package (must pass in an x matrix as well as a y vector)
# Set x-y definitions for glmnet package
x <- model.matrix(sold_price ~ . ,data = data_factor_core_clean)[, -1]
y <- data_factor_core_clean$sold_price[1:24653] # Manually restricted due rows not matching with x 'x' for an unknown reason
# General grid
grid <- exp(seq(10, -65, length = 101)) #grid of values from exp(10) [null model] to exp(-15) [least squares]
#Lasso
set.seed(1)
cv.out <- cv.glmnet(x, y, alpha = 1, lambda = grid, nfolds = 10) #lasso
plot(cv.out)
# Base decision
bestlam <- cv.out$lambda.min; bestlam; log(bestlam)
out <- cv.out$glmnet.fit
lasso.coef <- predict(out, type = "coefficients", s = bestlam); lasso.coef; lasso.coef[lasso.coef != 0]
sum(abs(lasso.coef[1:31])) #l1 norm
# +1se decision
bestlam2 <- cv.out$lambda.1se; bestlam2; log(bestlam2)
lasso.coef2 <- predict(out, type = "coefficients", s = bestlam2); lasso.coef2; lasso.coef2[lasso.coef2 != 0]
sum(abs(lasso.coef2[2:31])) #l1 norm
kd <- with(MASS::geyser, MASS::kde2d(sold_price, infections_3mma, n = 50))
fig <- plot_ly(x = kd$x, y = kd$y, z = kd$z) %>% add_surface()
fig
# Distribution: Total
a <- ggplot(data_factor, aes(x = sold_price/1000)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("Sold Price") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
b <- ggplot(data_factor, aes(x = list_price/1000)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("List Price") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
c <- ggplot(data_factor, aes(x = area_living)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("Living Area") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
d <- ggplot(data_factor, aes(x = land_acres)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("Land in Acres") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
e <- ggplot(data_factor, aes(x = area_total)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("Total Area") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
f <- ggplot(data_factor, aes(x = age)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("Age") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
g <- ggplot(data_factor, aes(x = dom)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("DOM") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
data_factor$sold_date <- as.Date(data_factor$sold_date)
str(data_factor)
tibble [24,412 × 49] (S3: tbl_df/tbl/data.frame)
$ mls_number : chr [1:24412] "CNNN5274" "CNNN5241" "CNN104918" "CNN104870" ...
$ property_type : Factor w/ 6 levels "CND","DUP","OTH",..: 5 5 5 5 5 5 5 5 5 5 ...
$ ac_type : Factor w/ 3 levels "central","none",..: 1 3 1 1 1 1 1 1 1 1 ...
$ list_price : num [1:24412] 187000 250000 224900 225000 274900 ...
$ patio : Factor w/ 2 levels "0","1": 1 1 1 2 2 1 2 2 2 2 ...
$ school_general : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ photo_count : num [1:24412] 0 0 0 0 25 2 6 17 17 15 ...
$ pool : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 2 1 ...
$ roof_type : Factor w/ 4 levels "metal","other",..: 3 3 2 3 3 3 2 3 2 2 ...
$ gas_type : Factor w/ 5 levels "butane","natural",..: 5 5 5 5 5 5 5 5 5 5 ...
$ out_building : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 1 2 1 ...
$ area_living : num [1:24412] 2054 2120 2078 1923 2184 ...
$ land_acres : num [1:24412] 0.28 0.4 0.29 0.36 0.82 0.36 1 1.27 0.63 2.01 ...
$ appliances : Factor w/ 2 levels "0","1": 2 1 2 2 2 2 2 2 2 2 ...
$ garage : Factor w/ 2 levels "0","1": 2 2 1 2 2 2 2 2 2 2 ...
$ property_condition : Factor w/ 3 levels "excellent","new",..: 3 3 3 3 3 3 3 3 3 3 ...
$ energy_efficient : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 2 1 2 ...
$ exterior_type : Factor w/ 5 levels "brick","metal",..: 3 3 4 4 1 3 4 1 3 3 ...
$ exterior_features : Factor w/ 6 levels "balcony","courtyard",..: 4 4 4 5 5 3 4 5 3 3 ...
$ fireplace : Factor w/ 2 levels "0","1": 2 1 1 2 2 2 2 2 2 2 ...
$ foundation_type : Factor w/ 3 levels "raised","slab",..: 1 2 1 2 2 3 2 2 2 2 ...
$ area_total : num [1:24412] 2254 2120 2962 2550 3510 ...
$ beds_total : Factor w/ 6 levels "0","1","2","3",..: 4 5 4 4 4 4 5 4 5 5 ...
$ bath_full : Factor w/ 6 levels "0","1","2","3",..: 3 3 3 3 3 3 3 3 3 3 ...
$ bath_half : Factor w/ 6 levels "0","1","2","3",..: 1 1 1 2 1 1 1 1 2 1 ...
$ age : num [1:24412] 82 9 70 27 7 6 38 32 15 5 ...
$ dom : num [1:24412] 78 83 89 203 231 54 144 108 26 25 ...
$ sold_price : num [1:24412] 169000 245000 230000 220000 272000 ...
$ sold_date : Date[1:24412], format: "2016-02-12" "2016-11-18" "2017-03-03" "2017-06-19" ...
$ sewer_type : Factor w/ 3 levels "city","septic",..: 1 1 1 2 2 1 3 3 1 3 ...
$ property_style : Factor w/ 2 levels "mobile","not_mobile": 2 2 2 2 2 2 2 2 2 2 ...
$ city_limits : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
$ subdivision : Factor w/ 2 levels "0","1": 2 1 2 2 2 2 2 2 2 2 ...
$ water_type : Factor w/ 2 levels "public","well": 1 1 1 1 1 1 1 1 1 1 ...
$ waterfront : Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 1 1 1 ...
$ infections_daily : num [1:24412] 0 0 0 0 0 0 0 0 0 0 ...
$ infections_accum : num [1:24412] 0 0 0 0 0 0 0 0 0 0 ...
$ corona_date_split : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ infections_3mma : num [1:24412] 0 0 0 0 0 0 0 0 0 0 ...
$ top25_sold_price : Factor w/ 2 levels "0","1": 1 2 1 1 2 2 1 2 2 2 ...
$ top50_sold_price : num [1:24412] 0 1 1 1 1 1 1 1 1 1 ...
$ bottom25_sold_price : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ top25_area_living : Factor w/ 2 levels "0","1": 2 2 2 1 2 2 2 2 2 1 ...
$ bottom25_area_living: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ top25_age : Factor w/ 2 levels "0","1": 2 1 2 1 1 1 1 1 1 1 ...
$ bottom25_age : Factor w/ 2 levels "0","1": 1 2 1 1 2 2 1 1 2 2 ...
$ top25_dom : Factor w/ 2 levels "0","1": 1 1 1 2 2 1 2 1 1 1 ...
$ bottom25_dom : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ infections_period : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
h <- ggplot(data_factor, aes(x = sold_date)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("Sold Date") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10)) +
scale_x_date(date_labels = "%Y")
i <- ggplot(data = subset(data_factor, data_factor$infections_daily > 1), aes(x = infections_daily)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("Infections Daily") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
data_factor$beds_total <- as.numeric(data_factor$beds_total)
j <- ggplot(data_factor, aes(x=beds_total)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
scale_fill_manual(values=c(very_low)) +
xlab("Number of Bedrooms") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
data_factor$bath_full <- as.numeric(data_factor$bath_full)
k <- ggplot(data_factor, aes(x=bath_full)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
scale_fill_manual(values=c(very_low)) +
xlab("Number of Full Bathrooms") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
data_factor$bath_half <- as.numeric(data_factor$bath_half)
l <- ggplot(data_factor, aes(x=bath_half)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
scale_fill_manual(values=c(very_low)) +
xlab("Number of Half Bathrooms") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
gridExtra::grid.arrange(a,b,c,d,e,f,g,h,i,j,k,l, nrow =4, ncol = 3)
Warning: Removed 16 rows containing non-finite values (stat_density).
Warning: Removed 2 rows containing non-finite values (stat_density).
<<<<<<< HEAD
lm_ucla <- lm(sold_price ~ pool + infections_period + pool*infections_period, data = data_factor)
summ(lm_ucla)
MODEL INFO:
Observations: 24412
Dependent Variable: sold_price
Type: OLS linear regression
MODEL FIT:
F(3,24408) = 635.35, p = 0.00
R² = 0.07
Adj. R² = 0.07
Standard errors: OLS
--------------------------------------------------------------------
Est. S.E. t val. p
------------------------------ ----------- --------- -------- ------
(Intercept) 154123.97 636.84 242.02 0.00
pool1 53118.17 2271.44 23.39 0.00
infections_period1 41724.75 1258.93 33.14 0.00
pool1:infections_period1 -7766.40 4259.65 -1.82 0.07
--------------------------------------------------------------------
# load package
library(sjPlot)
library(sjmisc)
library(sjlabelled)
tab_model(lm_ucla)
end of document